Introduction

Given a set of perturbed CRISPR candidates at enhancers, generate images to visualize perturbation effects on accessibility and binding.

Computational Setup

#Standard packages
library(rtracklayer) ; library(GenomicRanges); library(magrittr) ; library(Biostrings)
library(ggplot2) ; library(reshape2); library(plyranges); library(Rsamtools); library(parallel)
library(dplyr); library(data.table); library(patchwork); library(readr); library(testit)

#KNITR Options
setwd("/n/projects/mw2098/publications/2024_weilert_acc/code/2_analysis/")
figure_filepath<-"figures/14_plot_crispr"
options(knitr.figure_dir=figure_filepath, java.parameters = "- Xmx6g")

#Lab sources
source("scripts/r/granges_common.r")
source("scripts/r/metapeak_common.r")
source("scripts/r/knitr_common.r")
source("scripts/r/caching.r")
source("scripts/r/metapeak_functions.r")

#Specific sources
library(BSgenome.Mmusculus.UCSC.mm10)
library(TxDb.Mmusculus.UCSC.mm10.knownGene)
library(ggseqlogo)
library(rhdf5)
library(DESeq2)
source("scripts/r/motif_functions.r")

#Pre-existing variables
threads <- 5
bsgenome<-BSgenome.Mmusculus.UCSC.mm10
txdb<-TxDb.Mmusculus.UCSC.mm10.knownGene

#new directories
dir.create(paste0(figure_filepath, '/pisa'), showWarnings = F, recursive = T)
dir.create(paste0(figure_filepath, '/enhancers'), showWarnings = F, recursive = T)

#model information
tasks<-c('oct4', 'sox2', 'nanog', 'klf4', 'zic3')
motif_to_task.list<-list('Oct4-Sox2' = 'oct4', 'Sox2' = 'sox2', 'Klf4' = 'klf4', 'Zic3' = 'zic3', 'Nanog' = 'nanog')
motif_to_task.df<-data.frame(task = motif_to_task.list %>% unlist, motif = names(motif_to_task.list))
output_length <- 1000
input_length<-2032
flank_length<-(input_length - output_length) /2

#experimental data information
#Btbd11 enhancer (OS/S)
coop_perturbs.path<-'tsv/genomic/crispr/crispr_coop_predictions.tsv.gz'
coop_perturbs_w_bias.path<-'tsv/genomic/crispr/crispr_coop_predictions_with_bias.tsv.gz'
coop_scenarios.path<-'tsv/genomic/crispr/crispr_coop_scenarios.tsv.gz'
coop_chrom<-'chr10'
coop_genomic_window<-c(85539375, 85539800)
coop_genomic_window_short<-c(85539500,85539750)

crispr_coop_atac_reps.path<-Sys.glob('bw/mesc_Btbd11_scenario_WT_*_atac_*_cutsites.bw') %>% grep('distcontrol', ., invert = T, value = T)
crispr_coop_atac_combined.path<-Sys.glob('bw/mesc_Btbd11_scenario_WT_*_atac_combined_normalized.bw') %>% grep('distcontrol', ., invert = T, value = T)

#Akr1cl enhancer (S/S)
context_perturbs.path<-'tsv/genomic/crispr/crispr_context_predictions.tsv.gz'
context_perturbs_w_bias.path<-'tsv/genomic/crispr/crispr_context_predictions_with_bias.tsv.gz'
context_scenarios.path<-'tsv/genomic/crispr/crispr_context_scenarios.tsv.gz'
context_chrom<-'chr1'
context_genomic_window<-c(65037500, 65038000)

crispr_context_atac_reps.path<-c(Sys.glob('bw/mesc_Akr1cl_scenario_context_*_atac_*_cutsites.bw'),
                                 Sys.glob('bw/mesc_Btbd11_scenario_WT_coop_WT_atac_*_cutsites.bw'))
crispr_context_nexus_reps.path<-c(Sys.glob('bw/mesc_R1_*sox2_nexus_*_positive.bw'),
                                  Sys.glob('bw/mesc_Akr1cl_scenario_context_*sox2_nexus_*_positive.bw')) %>%
  .[!grepl('combined|normalized', .)]

crispr_context_atac_combined.path<-c(Sys.glob('bw/mesc_Akr1cl_scenario_context_*_atac_combined_normalized.bw'),
                                     'bw/mesc_Btbd11_scenario_WT_coop_WT_atac_combined_normalized.bw')

crispr_context_nexus_combined.path<-c(Sys.glob('bw/mesc_Akr1cl_scenario_context_mut_*_sox2_nexus_combined_normalized_positive.bw'),
                                     'bw/mesc_R1_sox2_nexus_combined_normalized_positive.bw')
  

#general paths
motifs.path<-'tsv/mapped_motifs/all_instances_curated_0based_w_perturb.tsv.gz'
motifs.df<-readr::read_tsv(motifs.path)

atac_regions.path<-'narrowpeak/mesc_atac_peaks.narrowPeak'
sox2_regions.path<-'narrowpeak/mesc_sox2_nexus_peaks.narrowPeak'
dev_enhancers.path<-'../../public/published/Avsec_2021_et_al/mm10_enhancer_regions.xlsx'

Read in developmental enhancer information.

dev_enhancers.gr<-readxl::read_xlsx(dev_enhancers.path) %>%
  tidyr::separate(col = 'mm10_coordinates_curated', into = c('seqnames','coord'), sep = ':') %>%
  tidyr::separate(col = 'coord', into = c('start','end'), sep = '-') %>%
  dplyr::filter(!is.na(start), ) %>%
  makeGRangesFromDataFrame(keep.extra.columns = T, seqnames.field = 'seqnames', start.field = 'start', end.field = 'end')

Custom functions

Custom function to compare 2-way interacting DESeq2 model factors for significance. Found at ( https://www.r-bloggers.com/2024/05/a-guide-to-designs-and-contrasts-in-deseq2/).

#Kudos to: https://www.r-bloggers.com/2024/05/a-guide-to-designs-and-contrasts-in-deseq2/
contraster <- function(dds,    # should contain colData and design
                       group1, # list of character vectors each with 2 or more items 
                       group2, # list of character vectors each with 2 or more items
                       weighted = F
){
  
  
  mod_mat <- model.matrix(design(dds), colData(dds)) #create matrix from the DESeq2 model
  
  grp1_rows <- list()
  grp2_rows <- list()
  
  
  for(i in 1:length(group1)){
    
    grp1_rows[[i]] <- colData(dds)[[group1[[i]][1]]] %in% group1[[i]][2:length(group1[[i]])] #Subselect DESeq2 model based on desired grouping indexes
    
  }
  
  
  for(i in 1:length(group2)){
    
    grp2_rows[[i]] <- colData(dds)[[group2[[i]][1]]] %in% group2[[i]][2:length(group2[[i]])] #Subselect DESeq2 model based on desired grouping indexes
    
  }
  
  #Remove any observations that aren't shared
  grp1_rows <- Reduce(function(x, y) x & y, grp1_rows)
  grp2_rows <- Reduce(function(x, y) x & y, grp2_rows)
  
  mod_mat1 <- mod_mat[grp1_rows, ,drop=F]
  mod_mat2 <- mod_mat[grp2_rows, ,drop=F]
  
  if(!weighted){
    
    mod_mat1 <- mod_mat1[!duplicated(mod_mat1),,drop=F]
    mod_mat2 <- mod_mat2[!duplicated(mod_mat2),,drop=F]
    
  }
  
  return(colMeans(mod_mat1)-colMeans(mod_mat2))
}

Visualize Btbd11 enhancer (Avsec et al CRISPR)

Read in perturbation data

perturbs.df<-readr::read_tsv(coop_perturbs_w_bias.path) %>% dplyr::mutate(state = factor(state, c('AB','A','B','null')))
scenarios.df<-readr::read_tsv(coop_scenarios.path) %>%
  dplyr::mutate(scenario_state = paste0(scenario, '_', state))

perturbs.df %>% 
  dplyr::group_by(scenario, state, model_name) %>%
  dplyr::summarize(sum_pred = sum(pred)) %>%
  tidyr::pivot_wider(names_from = state, values_from = sum_pred)
## # A tibble: 30 × 6
## # Groups:   scenario [3]
##    scenario model_name            AB      A      B   null
##    <chr>    <chr>              <dbl>  <dbl>  <dbl>  <dbl>
##  1 WT_coop  atac_0h_fold1     52809. 29734. 44686. 25960.
##  2 WT_coop  atac_12h_fold1    49601. 49302. 41545. 41118.
##  3 WT_coop  atac_15h_fold1    22530. 23674. 20181. 20932.
##  4 WT_coop  atac_3h_fold1     78326. 57549. 72700. 53742.
##  5 WT_coop  atac_6h_fold1     71464. 61467. 59353. 52740.
##  6 WT_coop  atac_9h_fold1     55506. 56850. 51031. 52431.
##  7 WT_coop  atac_wt_fold1     11774.  3424.  9278.  3137.
##  8 WT_coop  atac_wt_fold2      3959.  2011.  2641.  1816.
##  9 WT_coop  atac_wt_fold3     10097.  3348.  5990.  2233.
## 10 WT_coop  bpnet_osknz_fold1 37947. 15303. 14973.  9639.
## # ℹ 20 more rows

Report summary differences

For perturbations and experiments, what are the differences between the different scenarios and states?

Perturbations

#Plot perturbation summaries
perturbs_reads.plot<-rbind(
  perturbs.df %>% dplyr::filter(model_name=='atac_wt_fold1', genomic_position %in% c(coop_genomic_window[1]:coop_genomic_window[2])) %>% dplyr::mutate(range = 'full_window'),
  perturbs.df %>% dplyr::filter(model_name=='atac_wt_fold1', genomic_position %in% c(coop_genomic_window_short[1]:coop_genomic_window_short[2])) %>% dplyr::mutate(range = '250bp_window')
) %>%
  dplyr::group_by(scenario, state, range) %>%
  dplyr::summarize(sum_pred = sum(pred)) %>%
  ggplot(., aes(x = state, y = sum_pred))+
  geom_bar(stat = 'identity')+
  facet_grid(scenario ~ range)+
  theme_classic()

coop.df<-rbind(
  perturbs.df %>% dplyr::filter(model_name=='atac_wt_fold1', genomic_position %in% c(coop_genomic_window[1]:coop_genomic_window[2])) %>% dplyr::mutate(range = 'full_window'),
  perturbs.df %>% dplyr::filter(model_name=='atac_wt_fold1', genomic_position %in% c(coop_genomic_window_short[1]:coop_genomic_window_short[2])) %>% dplyr::mutate(range = '250bp_window')
) %>%
  dplyr::group_by(state, scenario, range) %>%
  dplyr::summarize(preds = sum(pred)) %>%
  tidyr::pivot_wider(names_from = state, values_from = preds) %>%
  dplyr::mutate(add_coop = round((AB - null)/(A + B - 2*null), 2),
                log_add_coop = round(log(add_coop), 2),
                AB_vs_B = AB/B,
                log_AB_vs_B = log(AB_vs_B))
print(coop.df)
## # A tibble: 6 × 10
## # Groups:   scenario [3]
##   scenario  range           AB     A     B  null add_coop log_add_coop AB_vs_B
##   <chr>     <chr>        <dbl> <dbl> <dbl> <dbl>    <dbl>        <dbl>   <dbl>
## 1 WT_coop   250bp_window 3848.  817. 2760.  534.     1.32         0.28    1.39
## 2 WT_coop   full_window  5135. 1098. 3685.  760.     1.34         0.29    1.39
## 3 dist_coop 250bp_window 1489.  724. 1022.  481.     1.29         0.25    1.46
## 4 dist_coop full_window  1903.  917. 1351.  637.     1.27         0.24    1.41
## 5 enh_coop  250bp_window 2380.  817. 1925.  534.     1.1          0.1     1.24
## 6 enh_coop  full_window  3147. 1098. 2527.  760.     1.13         0.12    1.25
## # ℹ 1 more variable: log_AB_vs_B <dbl>
perturbs_coop.plot<-ggplot(coop.df, aes(x = scenario, y = log_add_coop))+
  geom_bar(stat = 'identity')+
  facet_grid(. ~ range)+
  theme_classic()

perturbs.plot<-perturbs_reads.plot + perturbs_coop.plot
ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_perturbations_summary.png'), perturbs.plot, height = 6, width = 7)
ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_perturbations_summary.pdf'), perturbs.plot, height = 6, width = 7)

Print additional scenarios to verify our cooperativity.

#Assess Oct4 cooperativity in these different scenarios
oct4_coop.df<-perturbs.df %>% 
  dplyr::filter(model_name%in%c('bpnet_osknz_fold1', 'atac_wt_fold1'), 
                genomic_position %in% c(coop_genomic_window[1]:coop_genomic_window[2])) %>%
  dplyr::group_by(task, scenario, state) %>%
  dplyr::summarize(preds = sum(pred)) %>%
  tidyr::pivot_wider(names_from = state, values_from = preds) %>%
  dplyr::mutate(add_coop = round((AB - null)/(A + B - 2*null), 2),
                dir_coop = round((AB - B + null)/(A), 2),
                log_add_coop = round(log(add_coop), 2))
print(oct4_coop.df)
## # A tibble: 18 × 9
## # Groups:   task, scenario [18]
##    task  scenario      AB     A     B  null add_coop dir_coop log_add_coop
##    <chr> <chr>      <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>        <dbl>
##  1 atac  WT_coop    5135. 1098. 3685.  760.     1.34     2.01         0.29
##  2 atac  dist_coop  1903.  917. 1351.  637.     1.27     1.3          0.24
##  3 atac  enh_coop   3147. 1098. 2527.  760.     1.13     1.26         0.12
##  4 klf4  WT_coop    1854.  680.  826.  493.     2.62     2.24         0.96
##  5 klf4  dist_coop   501.  487.  357.  340.     0.98     0.99        -0.02
##  6 klf4  enh_coop   1564.  680.  725.  493.     2.56     1.96         0.94
##  7 nanog WT_coop   11906. 4972. 2519. 1594.     2.4      2.21         0.88
##  8 nanog dist_coop  2478. 1963. 1007.  753.     1.18     1.13         0.17
##  9 nanog enh_coop   7478. 4972. 1883. 1594.     1.6      1.45         0.47
## 10 oct4  WT_coop    1485.  479.  694.  345.     2.36     2.37         0.86
## 11 oct4  dist_coop   383.  354.  298.  269.     1        1            0   
## 12 oct4  enh_coop   2231.  479. 1101.  345.     2.12     3.08         0.75
## 13 sox2  WT_coop     936.  395.  312.  192.     2.31     2.07         0.84
## 14 sox2  dist_coop   269.  251.  149.  138.     1.05     1.03         0.05
## 15 sox2  enh_coop   1024.  395.  337.  192.     2.4      2.23         0.88
## 16 zic3  WT_coop    6030. 2200. 1686.  920.     2.5      2.39         0.92
## 17 zic3  dist_coop  1325. 1226.  585.  524.     1.05     1.03         0.05
## 18 zic3  enh_coop   3950. 2200. 1304.  920.     1.82     1.62         0.6
#Assess all cooperativity when Zic3 is absent
coop_w_zic3.df<-readr::read_tsv('tsv/genomic/crispr/crispr_coop_predictions_with_zic3_mut_with_bias.tsv.gz') %>% dplyr::mutate(state = factor(state, c('AB','A','B','null'))) %>%
  dplyr::filter(model_name%in%c('bpnet_osknz_fold1', 'atac_wt_fold1'), 
                genomic_position %in% c(coop_genomic_window[1]:coop_genomic_window[2])) %>%
  dplyr::group_by(task, scenario, state) %>%
  dplyr::summarize(preds = sum(pred)) %>%
  tidyr::pivot_wider(names_from = state, values_from = preds) %>%
  dplyr::mutate(add_coop = round((AB - null)/(A + B - 2*null), 2),
                dir_coop = round((AB - B + null)/(A), 2),
                log_add_coop = round(log(add_coop), 2))
print(coop_w_zic3.df)
## # A tibble: 18 × 9
## # Groups:   task, scenario [18]
##    task  scenario     AB     A      B  null add_coop dir_coop log_add_coop
##    <chr> <chr>     <dbl> <dbl>  <dbl> <dbl>    <dbl>    <dbl>        <dbl>
##  1 atac  WT_coop   4306.  775. 2914.  520.      1.43     2.47         0.36
##  2 atac  dist_coop 1371.  589.  909.  400.      1.39     1.46         0.33
##  3 atac  enh_coop  2741.  775. 1915.  520.      1.35     1.74         0.3 
##  4 klf4  WT_coop    963.  275.  467.  196.      2.19     2.51         0.78
##  5 klf4  dist_coop  256.  251.  178.  176.      1.05     1.02         0.05
##  6 klf4  enh_coop   884.  275.  434.  196.      2.17     2.35         0.77
##  7 nanog WT_coop   4672. 1298. 1414.  501.      2.44     2.9          0.89
##  8 nanog dist_coop  912.  722.  451.  355.      1.2      1.13         0.18
##  9 nanog enh_coop  3540. 1298. 1229.  501.      1.99     2.17         0.69
## 10 oct4  WT_coop    803.  201.  383.  149.      2.29     2.84         0.83
## 11 oct4  dist_coop  199.  185.  149.  139.      1.07     1.02         0.07
## 12 oct4  enh_coop  1541.  201.  746.  149.      2.15     4.71         0.77
## 13 sox2  WT_coop    484.  159.  186.   86.5     2.31     2.42         0.84
## 14 sox2  dist_coop  138.  130.   80.0  76.7     1.08     1.04         0.08
## 15 sox2  enh_coop   647.  159.  239.   86.5     2.48     3.1          0.91
## 16 zic3  WT_coop    691.  186.  272.   96.3     2.24     2.77         0.81
## 17 zic3  dist_coop  150.  137.   83.2  77.0     1.11     1.05         0.1 
## 18 zic3  enh_coop   588.  186.  246.   96.3     2.05     2.35         0.72

Experiments

experimental_scenarios<-c('WT')

experimental_reads.df<-lapply(experimental_scenarios, function(y){
  message('CRISPR scenario: ', y)
  
  if(y=='dist'){reps<-2:3} else{reps<-1:2}

  reads.df<-lapply(reps, function(z){
    #Define files
    atac_exp.bw.list<-list(
      'AB' = paste0('../1_processing/bw/mesc_Btbd11_scenario_', y, '_coop_WT_atac_', z, '_cutsites_normalized.bw'),
      'A' = paste0('../1_processing/bw/mesc_Btbd11_scenario_', y, '_coop_mut_A_atac_', z, '_cutsites_normalized.bw'),
      'B' = paste0('../1_processing/bw/mesc_Btbd11_scenario_', y, '_coop_mut_B_atac_', z, '_cutsites_normalized.bw'),
      'null' = paste0('../1_processing/bw/mesc_Btbd11_scenario_', y, '_coop_mut_null_atac_', z, '_cutsites_normalized.bw')
    )
    if(y=='dist'){atac_exp.bw.list$AB<-paste0('../1_processing/bw/mesc_Btbd11_scenario_', y, '_coop_mut_AB_atac_', z, '_cutsites_normalized.bw')}
    
    #Collect reads
    atac_reads.df<-lapply(names(atac_exp.bw.list), function(x){
      
      #Collect reads
      df<-rbind(
        lapply(names(atac_exp.bw.list), function(x){
          gr<-GRanges(coop_chrom, IRanges(start =coop_genomic_window[1], end = coop_genomic_window[2]))
          df<-data.frame(reads = regionSums(gr, atac_exp.bw.list[[x]]),
                         state = x,
                         scenario = y,
                         range = 'full_window')
          
        }) %>% rbindlist(),
        lapply(names(atac_exp.bw.list), function(x){
          gr<-GRanges(coop_chrom, IRanges(start =coop_genomic_window_short[1], end = coop_genomic_window_short[2]))
          df<-data.frame(reads = regionSums(gr, atac_exp.bw.list[[x]]),
                         state = x,
                         scenario = y,
                         range = '250bp_window')
          
        }) %>% rbindlist()   
      )
      
    }) %>% rbindlist() 
  }) %>% rbindlist()
}) %>% rbindlist()  

#Collect summary values
reads_summary.df<-experimental_reads.df %>% 
  dplyr::group_by(state, scenario, range) %>%
  dplyr::summarize(median_reads = median(reads)) %>% 
  dplyr::mutate(state = factor(state, c('AB','A','B','null')))
exp_reads.plot<-ggplot()+
  geom_bar(data = reads_summary.df, aes(x = state, y = median_reads), stat = 'identity')+
  geom_point(data = experimental_reads.df, aes(x = state, y = reads))+
  scale_y_continuous(name = 'RPM norm. observed ATAC-seq reads')+
  scale_x_discrete(name = 'CRISPR line')+
  facet_grid(scenario ~ range)+
  ggtitle('Observed reads')+
  theme_classic()

#Collect cooperativity values from the pooled samples
pooled_reads.df<-lapply(experimental_scenarios, function(y){
  message('CRISPR scenario: ', y)

    #Define files
    atac_exp.bw.list<-list(
      'AB' = paste0('bw/mesc_Btbd11_scenario_', y, '_coop_WT_atac_cutsites_combined_normalized.bw'),
      'A' = paste0('bw/mesc_Btbd11_scenario_', y, '_coop_mut_A_atac_cutsites_combined_normalized.bw'),
      'B' = paste0('bw/mesc_Btbd11_scenario_', y, '_coop_mut_B_atac_cutsites_combined_normalized.bw'),
      'null' = paste0('bw/mesc_Btbd11_scenario_', y, '_coop_mut_null_atac_cutsites_combined_normalized.bw')
    )
    if(y=='dist'){atac_exp.bw.list$AB<-paste0('bw/mesc_Btbd11_scenario_', y, '_coop_mut_AB_atac_cutsites_combined_normalized.bw')}

    #Collect reads
    atac_reads.df<-rbind(
      lapply(names(atac_exp.bw.list), function(x){
        gr<-GRanges(coop_chrom, IRanges(start =coop_genomic_window[1], end = coop_genomic_window[2]))
        df<-data.frame(reads = regionSums(gr, atac_exp.bw.list[[x]]),
                       state = x,
                       scenario = y,
                       range = 'full_window')
        
      }) %>% rbindlist(),
      lapply(names(atac_exp.bw.list), function(x){
          gr<-GRanges(coop_chrom, IRanges(start =coop_genomic_window_short[1], end = coop_genomic_window_short[2]))
        df<-data.frame(reads = regionSums(gr, atac_exp.bw.list[[x]]),
                       state = x,
                       scenario = y,
                       range = '250bp_window')
        
      }) %>% rbindlist()   
    )

}) %>% rbindlist()  

coop_summary.df<-pooled_reads.df %>%
  tidyr::pivot_wider(names_from = state, values_from = reads) %>%
  dplyr::group_by(range) %>%
  dplyr::mutate(add_coop = round((AB - null)/(A + B - 2*null), 2),
                log_add_coop = round(log(add_coop), 2),
                AB_vs_B = AB/B,
                log_AB_vs_B = log(AB_vs_B))
print(coop_summary.df)
## # A tibble: 2 × 10
## # Groups:   range [2]
##   scenario range           AB     A     B  null add_coop log_add_coop AB_vs_B
##   <chr>    <chr>        <dbl> <dbl> <dbl> <dbl>    <dbl>        <dbl>   <dbl>
## 1 WT       full_window   20.3 13.1   13.3  8.28     1.22         0.2     1.52
## 2 WT       250bp_window  15.5  9.97  10.3  6.09     1.16         0.15    1.50
## # ℹ 1 more variable: log_AB_vs_B <dbl>
exp_coop.plot<-ggplot(coop_summary.df %>% dplyr::select(scenario, log_add_coop), aes(x = 1, y = log_add_coop))+
  geom_bar(stat = 'identity')+
  facet_grid(scenario ~ range)+
  scale_y_continuous(name = 'Cooperativity')+
  ggtitle('Cooperativity')+
  theme_classic() +theme(axis.text.x = element_blank(), axis.title.x = element_blank(), axis.ticks.x = element_blank()) 
exp.plot<-exp_reads.plot + exp_coop.plot + plot_layout(widths = c(2, 1))
exp.plot

ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_experimental_summary.png'), exp.plot, height = 7, width = 7)
ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_experimental_summary.pdf'), exp.plot, height = 7, width = 7)

Assess cooperativity over different ranges

Show how cooperativity changes based on the window that we explore across. There is an upstream cluster of motifs.

coord_plot<-motifs.df %>% dplyr::filter(region_id==97554 | region_id==97555) %>%
  dplyr::select(motif, start, end) %>%
  ggplot(.)+
  geom_hline(yintercept = 0)+
  geom_rect(aes(xmin = start, xmax = end, ymin = -1, ymax = 1))+
  geom_text(aes(x = start, y = 2, label = motif))+
  scale_x_continuous(limits = c(coop_genomic_window[1], coop_genomic_window[1]+1000)) + 
  theme_classic()
ggsave(paste0(figure_filepath, '/Btbd11_coordinate_range.png'), coord_plot, height = 3, width = 12)
ggsave(paste0(figure_filepath, '/Btbd11_coordinate_range.pdf'), coord_plot, height = 3, width = 12)
#Collect cooperativity values from the pooled samples
range_exp.df<-lapply(experimental_scenarios, function(y){
  message('CRISPR scenario: ', y)

    #Define files
    atac_exp.bw.list<-list(
      'AB' = paste0('bw/mesc_Btbd11_scenario_', y, '_coop_WT_atac_cutsites_combined_normalized.bw'),
      'A' = paste0('bw/mesc_Btbd11_scenario_', y, '_coop_mut_A_atac_cutsites_combined_normalized.bw'),
      'B' = paste0('bw/mesc_Btbd11_scenario_', y, '_coop_mut_B_atac_cutsites_combined_normalized.bw'),
      'null' = paste0('bw/mesc_Btbd11_scenario_', y, '_coop_mut_null_atac_cutsites_combined_normalized.bw')
    )

    #Collect reads
    mclapply(seq(200, 1200, 100), function(z){
      lapply(names(atac_exp.bw.list), function(x){
        gr<-GRanges(coop_chrom, IRanges(start =coop_genomic_window[1], end = coop_genomic_window[2])) %>% GenomicRanges::resize(., z, 'start')
        if(end(gr)>max(perturbs.df$genomic_position)) {keep = F} else {keep = T}
        df<-data.frame(exp_reads = regionSums(gr, atac_exp.bw.list[[x]]),
                       pred_reads = perturbs.df %>% dplyr::filter(model_name=='atac_wt_fold1', 
                                                                  task == 'atac', 
                                                                  state == x, 
                                                                  scenario == paste0(y, '_coop'),
                                                                  genomic_position %in% start(gr):end(gr)) %>% 
                       dplyr::mutate(keep = keep) %>% dplyr::mutate(pred_keep = ifelse(keep, pred, NA)) %>% .$pred_keep %>% sum,
                       state = x,
                       scenario = y,
                       range = z)
        return(df)
      }) %>% rbindlist()
    }, mc.cores = 6) %>% rbindlist()
}) %>% rbindlist()  


range_coop_summary.df<-range_exp.df %>%
  dplyr::filter(scenario == 'WT') %>%
  tidyr::pivot_longer(cols = c('exp_reads', 'pred_reads'), names_to = 'type', values_to = 'reads') %>%
  tidyr::pivot_wider(names_from = state, values_from = reads) %>%
  dplyr::group_by(range, type) %>%
  dplyr::mutate(add_coop = round((AB - null)/(A + B - 2*null), 2),
                log_add_coop = round(log(add_coop), 2),
                AB_vs_B = AB/B,
                log_AB_vs_B = log(AB_vs_B))
print(range_coop_summary.df)
## # A tibble: 22 × 11
## # Groups:   range, type [22]
##    scenario range type            AB       A      B   null add_coop log_add_coop
##    <chr>    <dbl> <chr>        <dbl>   <dbl>  <dbl>  <dbl>    <dbl>        <dbl>
##  1 WT         200 exp_reads     6.27    3.81 3.35e0 2.66e0     1.96         0.67
##  2 WT         200 pred_reads 1153.    270.   7.65e2 2.01e2     1.5          0.41
##  3 WT         300 exp_reads    14.0     8.81 8.73e0 6.03e0     1.45         0.37
##  4 WT         300 pred_reads 3142.    688.   2.22e3 4.78e2     1.36         0.31
##  5 WT         400 exp_reads    19.3    12.4  1.26e1 7.72e0     1.21         0.19
##  6 WT         400 pred_reads 4762.   1025.   3.41e3 7.03e2     1.34         0.29
##  7 WT         500 exp_reads    23.8    15.9  1.62e1 1.05e1     1.21         0.19
##  8 WT         500 pred_reads 6132.   1366.   4.47e3 1.01e3     1.34         0.29
##  9 WT         600 exp_reads    30.0    21.1  2.21e1 1.52e1     1.16         0.15
## 10 WT         600 pred_reads 7621.   1878.   5.72e3 1.53e3     1.34         0.29
## # ℹ 12 more rows
## # ℹ 2 more variables: AB_vs_B <dbl>, log_AB_vs_B <dbl>
range.plot<-ggplot(range_coop_summary.df, aes(x = range, y = log_add_coop, color = type))+
  geom_line()+
  geom_point()+
  scale_y_continuous(name = 'ln(add_coop.)')+
  scale_x_continuous(name = 'Considered coop. region (bp)\naround S/OS motif pair')+
  theme_classic()
range.plot

ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_experimental_ranges.png'), range.plot, height = 3, width = 5)
ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_experimental_ranges.pdf'), range.plot, height = 3, width = 5)

Perform statistics on the differences.

#Do statistics on these values for significance
stats.df<-lapply(experimental_scenarios, function(x){
  lapply(experimental_reads.df$state %>% unique, function(y){
    ttest<-t.test(experimental_reads.df %>% dplyr::filter(state=='AB', scenario==x) %>% .$reads, 
           experimental_reads.df %>% dplyr::filter(state==y, scenario==x) %>% .$reads, 
           alternative = 'greater')
    message(x, y)
    print(ttest)
    
    data.frame(
      pval = round(ttest$p.value, 3),
      scenario = x,
      state = y,
      comparison = paste0('AB_vs_', y)
    )
  }) %>% rbindlist()
}) %>% rbindlist()
## 
##  Welch Two Sample t-test
## 
## data:  experimental_reads.df %>% dplyr::filter(state == "AB", scenario == x) %>% .$reads and experimental_reads.df %>% dplyr::filter(state == y, scenario == x) %>% .$reads
## t = 0, df = 30, p-value = 0.5
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  -1.4887     Inf
## sample estimates:
## mean of x mean of y 
##  17.87743  17.87743
## 
##  Welch Two Sample t-test
## 
## data:  experimental_reads.df %>% dplyr::filter(state == "AB", scenario == x) %>% .$reads and experimental_reads.df %>% dplyr::filter(state == y, scenario == x) %>% .$reads
## t = 8.6248, df = 25.639, p-value = 2.357e-09
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  5.105854      Inf
## sample estimates:
## mean of x mean of y 
##  17.87743  11.51212
## 
##  Welch Two Sample t-test
## 
## data:  experimental_reads.df %>% dplyr::filter(state == "AB", scenario == x) %>% .$reads and experimental_reads.df %>% dplyr::filter(state == y, scenario == x) %>% .$reads
## t = 8.1444, df = 25.964, p-value = 6.38e-09
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  4.782816      Inf
## sample estimates:
## mean of x mean of y 
##  17.87743  11.82756
## 
##  Welch Two Sample t-test
## 
## data:  experimental_reads.df %>% dplyr::filter(state == "AB", scenario == x) %>% .$reads and experimental_reads.df %>% dplyr::filter(state == y, scenario == x) %>% .$reads
## t = 15.7, df = 20.951, p-value = 2.319e-13
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  9.524355      Inf
## sample estimates:
## mean of x mean of y 
## 17.877425  7.180516
print(stats.df)
##     pval scenario  state comparison
##    <num>   <char> <char>     <char>
## 1:   0.5       WT     AB   AB_vs_AB
## 2:   0.0       WT      A    AB_vs_A
## 3:   0.0       WT      B    AB_vs_B
## 4:   0.0       WT   null AB_vs_null

Visualize predictions

Smooth the plots.

acc_plots.list<-mclapply(perturbs.df$scenario %>% unique, function(x){
  
  p.df<-perturbs.df %>% dplyr::filter(scenario==x, grepl('wt', model_name))
  s.df<-scenarios.df %>% dplyr::filter(scenario==x)
  
  #Calculate cooperativity value
  log_add_coop<-p.df %>%
    dplyr::group_by(state) %>%
    dplyr::summarize(preds = sum(pred)) %>%
    tidyr::pivot_wider(names_from = state, values_from = preds) %>%
    dplyr::mutate(add_coop = round((AB - null)/(A + B - 2*null), 2),
                  log_add_coop = round(log(add_coop), 2))
  
  acc.plot<-ggplot(p.df) + 
    geom_vline(xintercept = s.df$genomic_center_OS, linetype = 'dashed')+
    geom_vline(xintercept = s.df$genomic_center_S, linetype = 'dashed')+
    stat_smooth(aes(x = genomic_position, y = pred, color = state), method = 'loess', span = .2)+
    scale_x_continuous(name = paste0('Genomic position (bp)'), limits = coop_genomic_window)+
    # scale_x_continuous(name = paste0('Genomic position (bp)'))+
    scale_y_continuous(name = 'Predicted ATAC-seq')+
    scale_color_manual(name = 'State',values = c('#542788', '#2166ac', '#b2182b', 'gray'))+
    ggtitle(p.df$scenario %>% unique, subtitle = paste0('Add coop:', log_add_coop$add_coop, ', Log add coop:', log_add_coop$log_add_coop))+
    facet_grid(. ~ model_name) + 
    theme_classic()
    return(acc.plot)
  })
acc_plots.plot<-wrap_plots(acc_plots.list)+plot_layout(ncol = 1)
acc_plots.plot

ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_perturbations_smoothed.png'), acc_plots.plot, width = 16, height = 10)
ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_perturbations_smoothed.pdf'), acc_plots.plot, width = 16, height = 10)

Plot raw data as well.

acc_plots.list<-mclapply(perturbs.df$scenario %>% unique, function(x){
  
  p.df<-perturbs.df %>% dplyr::filter(scenario==x, grepl('wt', model_name))
  s.df<-scenarios.df %>% dplyr::filter(scenario==x)
  
  #Calculate cooperativity value
  log_add_coop<-p.df %>%
    dplyr::group_by(state) %>%
    dplyr::summarize(preds = sum(pred)) %>%
    tidyr::pivot_wider(names_from = state, values_from = preds) %>%
    dplyr::mutate(add_coop = round((AB - null)/(A + B - 2*null), 2),
                  log_add_coop = round(log(add_coop), 2))
  
  acc.plot<-ggplot(p.df) + 
    geom_vline(xintercept = s.df$genomic_center_OS, linetype = 'dashed')+
    geom_vline(xintercept = s.df$genomic_center_S, linetype = 'dashed')+
    geom_line(aes(x = genomic_position, y = pred, color = state))+
    scale_x_continuous(name = paste0('Genomic position (bp)'), limits = coop_genomic_window)+
    # scale_x_continuous(name = paste0('Genomic position (bp)'))+
    scale_y_continuous(name = 'Predicted ATAC-seq', limits = c(0, 50))+
    scale_color_manual(name = 'State',values = c('#542788', '#2166ac', '#b2182b', 'gray'))+
    ggtitle(p.df$scenario %>% unique, subtitle = paste0('Add coop:', log_add_coop$add_coop, ', Log add coop:', log_add_coop$log_add_coop))+
    facet_grid(. ~ model_name) + 
    theme_classic()
  return(acc.plot)
  })
acc_plots.plot<-wrap_plots(acc_plots.list)+plot_layout(ncol = 1)
acc_plots.plot

ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_perturbations_raw.png'), acc_plots.plot, width = 16, height = 10)
ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_perturbations_raw.pdf'), acc_plots.plot, width = 16, height = 10)

Visualize experimental data

Plot fragments

experimental_scenarios<-c('WT')

acc_plots.list<-lapply(experimental_scenarios, function(y){
  message('CRISPR scenario: ', y)
  atac_exp.bw.list<-list(
    'AB' = paste0('bw/mesc_Btbd11_scenario_', y, '_coop_WT_atac_combined_normalized.bw'),
    'A' = paste0('bw/mesc_Btbd11_scenario_', y, '_coop_mut_A_atac_combined_normalized.bw'),
    'B' = paste0('bw/mesc_Btbd11_scenario_', y, '_coop_mut_B_atac_combined_normalized.bw'),
    'null' = paste0('bw/mesc_Btbd11_scenario_', y, '_coop_mut_null_atac_combined_normalized.bw')
  )
  if(y=='dist'){atac_exp.bw.list$AB<-paste0('bw/mesc_Btbd11_scenario_', y, '_coop_mut_AB_atac_combined_normalized.bw')}
  
  atac_exp.df<-lapply(names(atac_exp.bw.list), function(x){
    standard_metapeak_matrix(GRanges(coop_chrom, IRanges(start =coop_genomic_window[1], end = coop_genomic_window[2])),
                             atac_exp.bw.list[[x]], keep_region_coordinates = T) %>%
      as.data.frame() %>% transpose() %>%
      dplyr::rename(pred = V1) %>%
      dplyr::mutate(genomic_position = coop_genomic_window[1]:coop_genomic_window[2],
                    sample = x)
  }) %>% rbindlist() %>%
    dplyr::mutate(sample = factor(sample, names(atac_exp.bw.list)))
  
  atac_exp.plot<-ggplot(atac_exp.df) + 
    geom_vline(xintercept = scenarios.df %>% dplyr::filter(scenario=='WT_coop') %>% .$genomic_center_OS, linetype = 'dashed')+
    geom_vline(xintercept = scenarios.df %>% dplyr::filter(scenario=='WT_coop') %>% .$genomic_center_S, linetype = 'dashed')+
    geom_line(aes(x = genomic_position, y = pred, color = sample))+
    scale_x_continuous(name = paste0('Genomic position (bp)'), limits = coop_genomic_window)+
    scale_y_continuous(name = 'RPM-normalized ATAC-seq fragments', limits = c(0, 3.2))+
    scale_color_manual(name = 'State', values = c('#542788', '#2166ac', '#b2182b', 'gray'))+
    ggtitle(y)+
    theme_classic()
  
  atac_exp.plot
  
  ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_scenario_', y, '_experiments_RPM_fragments.png'), atac_exp.plot, width = 7, height = 3)
  ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_scenario_', y, '_experiments_RPM_fragments.pdf'), atac_exp.plot, width = 7, height = 3)
  
  print(atac_exp.df %>% dplyr::group_by(sample) %>%
    dplyr::summarize(sum_reads = sum(pred)) %>%
    tidyr::pivot_wider(names_from = sample, values_from = sum_reads) %>%
    dplyr::mutate(coop = log((AB-null)/(A - null + B - null))))
})
## # A tibble: 1 × 5
##      AB     A     B  null  coop
##   <dbl> <dbl> <dbl> <dbl> <dbl>
## 1  904.  663.  595.  470. 0.314

Plot cutsites

experimental_scenarios<-c('WT')

acc_plots.list<-lapply(experimental_scenarios, function(y){
  message('CRISPR scenario: ', y)
  atac_exp.bw.list<-list(
    'AB' = paste0('bw/mesc_Btbd11_scenario_', y, '_coop_WT_atac_cutsites_combined_normalized.bw'),
    'A' = paste0('bw/mesc_Btbd11_scenario_', y, '_coop_mut_A_atac_cutsites_combined_normalized.bw'),
    'B' = paste0('bw/mesc_Btbd11_scenario_', y, '_coop_mut_B_atac_cutsites_combined_normalized.bw'),
    'null' = paste0('bw/mesc_Btbd11_scenario_', y, '_coop_mut_null_atac_cutsites_combined_normalized.bw')
  )
  if(y=='dist'){atac_exp.bw.list$AB<-paste0('bw/mesc_Btbd11_scenario_', y, '_coop_mut_AB_atac_cutsites_combined_normalized.bw')}
  
  atac_exp.df<-lapply(names(atac_exp.bw.list), function(x){
    standard_metapeak_matrix(GRanges(coop_chrom, IRanges(start =coop_genomic_window[1], end = coop_genomic_window[2])),
                             atac_exp.bw.list[[x]], keep_region_coordinates = T) %>%
      as.data.frame() %>% transpose() %>%
      dplyr::rename(pred = V1) %>%
      dplyr::mutate(genomic_position = coop_genomic_window[1]:coop_genomic_window[2],
                    sample = x)
  }) %>% rbindlist() %>%
    dplyr::mutate(sample = factor(sample, names(atac_exp.bw.list)))
  
  atac_exp.plot<-ggplot(atac_exp.df) + 
    geom_vline(xintercept = scenarios.df %>% dplyr::filter(scenario=='WT_coop') %>% .$genomic_center_OS, linetype = 'dashed')+
    geom_vline(xintercept = scenarios.df %>% dplyr::filter(scenario=='WT_coop') %>% .$genomic_center_S, linetype = 'dashed')+
    stat_smooth(aes(x = genomic_position, y = pred, color = sample), method = 'loess', span = .2)+
    scale_x_continuous(name = paste0('Genomic position (bp)'), limits = coop_genomic_window)+
    scale_y_continuous(name = 'RPM ATAC-seq cutsites(smooth)')+
    scale_color_manual(name = 'State',values = c('#542788', '#2166ac', '#b2182b', 'gray'))+
    ggtitle(y)+
    theme_classic()
  
  atac_exp.plot
  
  ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_scenario_', y, '_experiments_RPM_cutsites_smooth.png'), atac_exp.plot, width = 7, height = 3)
  ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_scenario_', y, '_experiments_RPM_cutsites_smooth.pdf'), atac_exp.plot, width = 7, height = 3)
  
})

Visualize contribution as wildtype

curated_enhancer_boundaries.gr<-GRanges(coop_chrom, IRanges(coop_genomic_window[1], coop_genomic_window[2]))

Generate contribution scores of mapped motifs.

curated_enhancer.df<-data.frame(
  acc_contrib = standard_metapeak_matrix(regions.gr = curated_enhancer_boundaries.gr, sample.cov = 'shap/atac_wt_fold1_atac_counts.bw', 
                                         keep_region_coordinates = T) %>% t(),
  seq = getSeq(bsgenome, curated_enhancer_boundaries.gr, as.character = T) %>% stringr::str_split_1(pattern = ''),
  position = start(curated_enhancer_boundaries.gr):end(curated_enhancer_boundaries.gr)
)

# Generate sequence logo
acc_contrib.plot<-curated_enhancer.df%>%
  tidyr::pivot_wider(names_from = seq, values_from = acc_contrib) %>% 
  dplyr::select(A, C, G, T) %>% as.matrix() %>% t() %>%
ggseqlogo(., method='custom', seq_type='dna') + 
  scale_y_continuous(name = 'Acc. contribution.')+
  scale_x_discrete(name = 'Genomic position (chr10, bp)')+
  ggtitle('CRISCR enhancer')
acc_contrib.plot

ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_candidate_contrib.png'), acc_contrib.plot, height = 2, width = 20)
ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_candidate_contrib.pdf'), acc_contrib.plot, height = 2, width = 20)

Read in contribution scores and plot

seqs.fa<-seqinr::read.fasta('fasta/crispr_coop_seqs.fa')

filler<-mclapply(Sys.glob('shap/crispr/crispr_coop_*_counts.h5'), function(y){
  contrib.h5<-rhdf5::h5read(y, '/')
  
  hyp_scores.list<-apply(contrib.h5$hyp_scores, 3, function(x){
    as.data.frame(x) %>% transpose()
  })
  input_seqs.list<-apply(contrib.h5$input_seqs, 3, function(x){
    df<-as.data.frame(x)
    df<-dplyr::mutate_all(df, function(x) as.numeric(as.character(x)))
    return(df %>% transpose)
  })
  
  contrib.list<-lapply(1:length(hyp_scores.list), function(x){
    mat<-input_seqs.list[[x]] * hyp_scores.list[[x]]
    colnames(mat)<-c('A','C','G','T')
    return(mat %>% t())
  })
  names(contrib.list)<-names(seqs.fa)
  
  #Visualize contribution scores
  contrib.plot<- ggseqlogo(contrib.list, method='custom', seq_type='dna') + 
    scale_y_continuous(name = 'Contribution') + 
    scale_x_continuous(limits = c(700, 1200)) + 
    facet_wrap(~seq_group, ncol=4, scales='free_x') +
    theme(axis.line = element_line(), axis.ticks = element_line())+
    ggtitle(y)
  
  ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_contribution_', basename(y), '.png'), contrib.plot, width = 16, height = 10)
  ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_contribution_', basename(y), '.pdf'), contrib.plot, width = 16, height = 10)
}, mc.cores = 6)

Verify paired cooperativity

Confirm motif sequences are indeed high/low-affinity

readr::read_tsv('tsv/insilico/marginalizations/crispr/btbd11_marginalizations_atac_wt.tsv.gz')
## # A tibble: 2 × 6
##   seqA            motifA seqB  motifB state  atac
##   <chr>           <chr>  <chr> <chr>  <chr> <dbl>
## 1 ATTAGCATTACAATT all    none  empty  A      6.80
## 2 none            all    none  empty  null   6.03
readr::read_tsv('tsv/insilico/marginalizations/crispr/btbd11_marginalizations_bpnet_osknz.tsv.gz')
## # A tibble: 2 × 10
##   seqA            motifA seqB  motifB state  oct4  sox2  klf4 nanog  zic3
##   <chr>           <chr>  <chr> <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ATTAGCATTACAATT all    none  empty  A      6.14  5.16  5.62  6.22  4.79
## 2 none            all    none  empty  null   5.29  4.61  5.31  5.58  4.39
invivo_marg.df<-readr::read_tsv('tsv/insilico/marginalizations/motifs/all_marginalizations_expanded.tsv.gz') %>%
  dplyr::filter(seq %in% c(motifs.df %>% dplyr::filter(region_id == 97547) %>% .$seq, 'ATTAGCATTACAATT'))
invivo_marg.df  %>% as.data.frame
##           seq oct4_marg oct4_inj sox2_marg sox2_inj klf4_marg klf4_inj
## 1 GCTCAGCTGGG 0.0036268 5.295044 0.0027845 4.616916 0.0043634 5.313706
##   nanog_marg nanog_inj zic3_marg zic3_inj atac_wt_marg atac_wt_inj atac_wt_null
## 1  0.0039029  5.586322 0.1526327 4.539115    0.0361964    6.061814     6.025618
##   atac_0h_marg atac_0h_inj atac_0h_null atac_3h_marg atac_3h_inj atac_3h_null
## 1     0.026695    9.447199     9.420504     0.036211    9.683814     9.647603
##   atac_6h_marg atac_6h_inj atac_6h_null atac_9h_marg atac_9h_inj atac_9h_null
## 1     0.030346    9.855091     9.824745     0.031047    9.630625     9.599578
##   atac_12h_marg atac_12h_inj atac_12h_null atac_15h_marg atac_15h_inj
## 1      0.022422     9.730329      9.707907      0.025415     9.496419
##   atac_15h_null motif
## 1      9.471004  Zic3
motifs.df %>% dplyr::filter(seq=='ATTATCATTATAATT') %>% nrow(.)
## [1] 1

Between all other motifs, what does our model think is happening?

pairs.df<-readr::read_tsv('tsv/genomic/all_pairs_curated_0based_w_perturb.tsv.gz')

pairs.df %>% dplyr::filter(region_id == 97554) %>% dplyr::select(motif.x, pattern_center.x, width.x)
## # A tibble: 6 × 3
##   motif.x pattern_center.x width.x
##   <chr>              <dbl>   <dbl>
## 1 Klf4                 560      11
## 2 Klf4                 560      11
## 3 Nanog                536       8
## 4 Nanog                536       8
## 5 Sox2                 441       9
## 6 Sox2                 441       9
pairs.df %>% dplyr::filter(region_id == 97554) %>%
  dplyr::select(motif.x, motif.y,
                `atac_wt/atac/hAB/pred_sum`, `atac_wt/atac/hA/pred_sum`, `atac_wt/atac/hB/pred_sum`, `atac_wt/atac/null/pred_sum`,
                `bpnet_osknz/oct4/hAB/pred_sum`, `bpnet_osknz/oct4/hA/pred_sum`, `bpnet_osknz/oct4/hB/pred_sum`, `bpnet_osknz/oct4/null/pred_sum`) %>%
  dplyr::mutate(atac_coop = ((`atac_wt/atac/hAB/pred_sum`-`atac_wt/atac/null/pred_sum`)/(`atac_wt/atac/hA/pred_sum`+`atac_wt/atac/hB/pred_sum` - 2*`atac_wt/atac/null/pred_sum`)),
                oct4_coop = ((`bpnet_osknz/oct4/hAB/pred_sum` - `bpnet_osknz/oct4/hB/pred_sum` + `bpnet_osknz/oct4/null/pred_sum`)/(`bpnet_osknz/oct4/hA/pred_sum`))) %>%
  as.data.frame
##   motif.x motif.y atac_wt/atac/hAB/pred_sum atac_wt/atac/hA/pred_sum
## 1    Klf4   Nanog                  679.9903                 470.2959
## 2    Klf4    Sox2                  679.9903                 519.5516
## 3   Nanog    Klf4                  679.9903                 409.8858
## 4   Nanog    Sox2                  679.9903                 519.5516
## 5    Sox2    Klf4                  679.9903                 409.8858
## 6    Sox2   Nanog                  679.9903                 470.2959
##   atac_wt/atac/hB/pred_sum atac_wt/atac/null/pred_sum
## 1                 409.8858                   301.5631
## 2                 409.8858                   310.0992
## 3                 470.2959                   301.5631
## 4                 470.2959                   353.0599
## 5                 519.5516                   310.0992
## 6                 519.5516                   353.0599
##   bpnet_osknz/oct4/hAB/pred_sum bpnet_osknz/oct4/hA/pred_sum
## 1                      164.7988                     126.8600
## 2                      164.7988                     166.3281
## 3                      164.7988                     133.3857
## 4                      164.7988                     166.3281
## 5                      164.7988                     133.3857
## 6                      164.7988                     126.8600
##   bpnet_osknz/oct4/hB/pred_sum bpnet_osknz/oct4/null/pred_sum atac_coop
## 1                     133.3857                       115.6963  1.365890
## 2                     133.3857                       137.3726  1.196133
## 3                     126.8600                       115.6963  1.365890
## 4                     126.8600                       131.7498  1.152268
## 5                     166.3281                       137.3726  1.196133
## 6                     166.3281                       131.7498  1.152268
##   oct4_coop
## 1  1.159619
## 2  1.014775
## 3  1.151810
## 4  1.020204
## 5  1.018425
## 6  1.026489

Supplemental validations

Replicate validation

Compare CRISPR ATAC-seq samples to make sure they’re replicable. So only compare replicates here.

atac_regions.gr<-rtracklayer::import(atac_regions.path) %>% 
  plyranges::mutate(across_Btbd11 = ifelse(overlapsAny(., GRanges(coop_chrom, IRanges(start = coop_genomic_window[1], end = coop_genomic_window[2]))), 'Btbd11', 'none'),
                    across_enhancer = ifelse(overlapsAny(., dev_enhancers.gr), 'dev_enh', across_Btbd11))  %>% arrange(desc(across_enhancer))

#Collect coverage
crispr_coop_cov.df<-mclapply(crispr_coop_atac_reps.path, function(x){
  regionSums(atac_regions.gr, x)
}, mc.cores = 4) %>% as.data.frame
colnames(crispr_coop_cov.df)<-crispr_coop_atac_reps.path

#Consolidate values
crispr_coop_cov_w_coord.df<-cbind(atac_regions.gr %>% as.data.frame, crispr_coop_cov.df) %>% arrange(desc(across_enhancer))

#Plot the sequencing depth imbalance.
plot.list<-lapply(Sys.glob('bw/mesc_Btbd11_scenario_WT_*_atac_1_cutsites.bw') %>% grep('distcontrol', ., value = T, invert = T), function(x){
  crispr_coop_cov_w_coord.df$repa<-crispr_coop_cov_w_coord.df[[x]]
  crispr_coop_cov_w_coord.df$repb<-crispr_coop_cov_w_coord.df[[gsub('_1_', '_2_', x)]]

  plot<-ggplot(data = crispr_coop_cov_w_coord.df, aes(x = log(repa), y = log(repb)))+
    geom_abline(slope = 1, intercept = 0)+
    ggrastr::geom_point_rast(aes(color = across_enhancer), size = .1)+
    # geom_point(data = crispr_coop_cov_w_coord.df %>% dplyr::filter(across_Btbd11_enhancer), aes(x = log(repa), y = log(repb)), size = .2, color = 'red')+
    ggpubr::stat_cor(method = 'spearman', label.x.npc = 'left', label.y.npc = 'top')+
    scale_color_manual(values = c( 'red', 'navy', 'gray'))+
    scale_x_continuous(name = paste0('rep 1 log-reads across peaks'))+
    scale_y_continuous(name = paste0('rep 2 log-reads across peaks'))+
    ggtitle(basename(x) %>% gsub('_atac_1_cutsites.bw', '', .))+
    theme_classic() + theme(legend.position = 'none')
  
  return(plot)
})

rep.plot<-wrap_plots(plot.list) + plot_layout(nrow = 1, ncol = 4)
ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_rep_comparison.png'), rep.plot, height = 6, width = 12)
ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_rep_comparison.pdf'), rep.plot, height = 6, width = 12)

Developmental enhancer consistency

Do our conditions and CRISPR mutants look consistent across regions of interest that are not the CRISPR locus?

filler<-lapply(1:length(dev_enhancers.gr), function(x){
  
  metapeak.df<-mclapply(crispr_coop_atac_combined.path, function(y){
    standard_metapeak(resize(dev_enhancers.gr[x], 1, 'center'), y, downstream = 501, upstream = 500) %>%
      dplyr::mutate(path = y, enhancer = dev_enhancers.gr[x]$name)
  }, mc.cores = 8) %>% rbindlist() %>%
    dplyr::mutate(scenario_state = basename(path) %>% gsub('mesc_Btbd11_scenario_', '', .)%>% gsub('_atac_combined_normalized.bw', '', .),
                  genomic_position = tss_distance + start(resize(dev_enhancers.gr[x], 1, 'center'))) %>%
    tidyr::separate(col = 'scenario_state', into = c('scenario','state'), sep = '_coop_', remove = F) %>%
    dplyr::mutate(state = factor(gsub('mut_', '', state), c('WT','A','B','null')))
    
  
  enhancer_name<-dev_enhancers.gr[x]$name
  plot<-ggplot(metapeak.df, aes(x = genomic_position, y = reads, color = state))+
    geom_line()+
    scale_x_continuous(name = seqnames(dev_enhancers.gr[x])) + 
    scale_y_continuous(name = 'RPM-normalized ATAC-seq fragments', limits = c(0, 5))+
    scale_colour_manual(values = turbo(4, begin = .2, end = .8))+
    facet_grid(. ~ scenario)+
    ggtitle(enhancer_name)+
    theme_classic()
  ggsave(paste0(figure_filepath, '/enhancers/', enhancer_name, '_Btbd11_crispr_coop_comparison.png'), plot, height = 4, width = 8)
  ggsave(paste0(figure_filepath, '/enhancers/', enhancer_name, '_Btbd11_crispr_coop_comparison.pdf'), plot, height = 4, width = 8)
  return(NULL)
})

CRISPR mutant differential enrichment

Run DESeq2 in order to measure whether CRISPR scenarios are significantly differentially enriched when we do these mutations. Since we are comparing a few features, we will contrast a couple assessments first.

#Format organization
atac_labels.df<-data.frame(filename = crispr_coop_atac_reps.path) %>%
  dplyr::mutate(scenario_state = basename(crispr_coop_atac_reps.path) %>% gsub('mesc_Btbd11_scenario_', '', .) %>% gsub('_cutsites.bw', '', .)) %>%
  # tidyr::separate(state, into = c('timepoint','replicate'), remove = F)
    tidyr::separate(col = 'scenario_state', into = c('scenario','state'), sep = '_coop_', remove = F) %>%
    tidyr::separate(col = 'state', into = c('state','rep'), sep = '_atac_', remove = T) %>%
    dplyr::mutate(state = factor(gsub('mut_', '', state), c('WT','A','B','null')))
rownames(atac_labels.df)<-crispr_coop_atac_reps.path
atac_labels.df
##                                                                                                                filename
## bw/mesc_Btbd11_scenario_WT_coop_mut_A_atac_1_cutsites.bw       bw/mesc_Btbd11_scenario_WT_coop_mut_A_atac_1_cutsites.bw
## bw/mesc_Btbd11_scenario_WT_coop_mut_A_atac_2_cutsites.bw       bw/mesc_Btbd11_scenario_WT_coop_mut_A_atac_2_cutsites.bw
## bw/mesc_Btbd11_scenario_WT_coop_mut_B_atac_1_cutsites.bw       bw/mesc_Btbd11_scenario_WT_coop_mut_B_atac_1_cutsites.bw
## bw/mesc_Btbd11_scenario_WT_coop_mut_B_atac_2_cutsites.bw       bw/mesc_Btbd11_scenario_WT_coop_mut_B_atac_2_cutsites.bw
## bw/mesc_Btbd11_scenario_WT_coop_mut_null_atac_1_cutsites.bw bw/mesc_Btbd11_scenario_WT_coop_mut_null_atac_1_cutsites.bw
## bw/mesc_Btbd11_scenario_WT_coop_mut_null_atac_2_cutsites.bw bw/mesc_Btbd11_scenario_WT_coop_mut_null_atac_2_cutsites.bw
## bw/mesc_Btbd11_scenario_WT_coop_WT_atac_1_cutsites.bw             bw/mesc_Btbd11_scenario_WT_coop_WT_atac_1_cutsites.bw
## bw/mesc_Btbd11_scenario_WT_coop_WT_atac_2_cutsites.bw             bw/mesc_Btbd11_scenario_WT_coop_WT_atac_2_cutsites.bw
##                                                                      scenario_state
## bw/mesc_Btbd11_scenario_WT_coop_mut_A_atac_1_cutsites.bw       WT_coop_mut_A_atac_1
## bw/mesc_Btbd11_scenario_WT_coop_mut_A_atac_2_cutsites.bw       WT_coop_mut_A_atac_2
## bw/mesc_Btbd11_scenario_WT_coop_mut_B_atac_1_cutsites.bw       WT_coop_mut_B_atac_1
## bw/mesc_Btbd11_scenario_WT_coop_mut_B_atac_2_cutsites.bw       WT_coop_mut_B_atac_2
## bw/mesc_Btbd11_scenario_WT_coop_mut_null_atac_1_cutsites.bw WT_coop_mut_null_atac_1
## bw/mesc_Btbd11_scenario_WT_coop_mut_null_atac_2_cutsites.bw WT_coop_mut_null_atac_2
## bw/mesc_Btbd11_scenario_WT_coop_WT_atac_1_cutsites.bw             WT_coop_WT_atac_1
## bw/mesc_Btbd11_scenario_WT_coop_WT_atac_2_cutsites.bw             WT_coop_WT_atac_2
##                                                             scenario state rep
## bw/mesc_Btbd11_scenario_WT_coop_mut_A_atac_1_cutsites.bw          WT     A   1
## bw/mesc_Btbd11_scenario_WT_coop_mut_A_atac_2_cutsites.bw          WT     A   2
## bw/mesc_Btbd11_scenario_WT_coop_mut_B_atac_1_cutsites.bw          WT     B   1
## bw/mesc_Btbd11_scenario_WT_coop_mut_B_atac_2_cutsites.bw          WT     B   2
## bw/mesc_Btbd11_scenario_WT_coop_mut_null_atac_1_cutsites.bw       WT  null   1
## bw/mesc_Btbd11_scenario_WT_coop_mut_null_atac_2_cutsites.bw       WT  null   2
## bw/mesc_Btbd11_scenario_WT_coop_WT_atac_1_cutsites.bw             WT    WT   1
## bw/mesc_Btbd11_scenario_WT_coop_WT_atac_2_cutsites.bw             WT    WT   2
#Consolidate counts
atac.df<-crispr_coop_cov.df
colnames(atac.df)<-crispr_coop_atac_reps.path

#Run DESeq2
dds <- DESeqDataSetFromMatrix(countData = atac.df,
                              colData = atac_labels.df,
                              design = ~ state)
dds <- DESeq(dds)

Show enrichment results.

Within each scenario, do we see significance from the WT?

deseq_comparisons.df<-lapply(c('WT'), function(x){
  lapply(c('A','B','null'), function(y){
    res <- results(dds, contrast = contraster(dds,
                                              group1 = list(c("scenario", x), c("state", "WT")),
                                              group2 = list(c("scenario", x), c("state", y)))) %>%
      cbind(., atac_regions.gr %>% as.data.frame()) %>% as.data.frame %>% 
      dplyr::mutate(signif = cut(padj, breaks = c(0, 0.001, 0.01, 0.05, 1), include.lowest = T, labels = c('***','**','*',''))) %>%
      dplyr::filter(across_enhancer == 'Btbd11') %>% 
      dplyr::mutate(state = y, scenario = x, comparison = paste0(x, ': WT vs ', y))
  }) %>% rbindlist()
}) %>% rbindlist()                

deseq_comparisons.df
##    baseMean log2FoldChange      lfcSE      stat       pvalue         padj
##       <num>          <num>      <num>     <num>        <num>        <num>
## 1:  3693.95      0.3734261 0.06608023  5.651101 1.594232e-08 9.315719e-07
## 2:  3693.95      0.1880535 0.06588973  2.854064 4.316379e-03 1.042809e-02
## 3:  3693.95      0.6725339 0.06679151 10.069153 7.562689e-24 7.506964e-21
##    seqnames    start      end width strand   name score signalValue  pValue
##      <fctr>    <int>    <int> <int> <fctr> <char> <num>       <num>   <num>
## 1:    chr10 85539345 85540344  1000      *   <NA>  1000     8.51039 710.946
## 2:    chr10 85539345 85540344  1000      *   <NA>  1000     8.51039 710.946
## 3:    chr10 85539345 85540344  1000      *   <NA>  1000     8.51039 710.946
##     qValue  peak across_Btbd11 across_enhancer signif  state scenario
##      <num> <int>        <char>          <char> <fctr> <char>   <char>
## 1: 706.851   500        Btbd11          Btbd11    ***      A       WT
## 2: 706.851   500        Btbd11          Btbd11      *      B       WT
## 3: 706.851   500        Btbd11          Btbd11    ***   null       WT
##        comparison
##            <char>
## 1:    WT: WT vs A
## 2:    WT: WT vs B
## 3: WT: WT vs null

Visualize comparison plots with statistics attached:

#Collect coverage
crispr_coop_cov_combined.df<-mclapply(crispr_coop_atac_combined.path, function(x){
  log(regionSums(atac_regions.gr, x))
}, mc.cores = 4) %>% as.data.frame
colnames(crispr_coop_cov_combined.df)<-crispr_coop_atac_combined.path

#Consolidate values
crispr_coop_cov_w_coord_combined.df<-cbind(atac_regions.gr %>% as.data.frame, crispr_coop_cov_combined.df) %>% arrange(desc(across_enhancer))

#Plot the sequencing depth imbalance.
plot.list<-lapply(c('WT'), function(x){
  compare.plot.list<-lapply(c('A','B','null'), function(y){

    wt_sample<-paste0('bw/mesc_Btbd11_scenario_', x, '_coop_WT_atac_combined_normalized.bw')
    mut_sample<-paste0('bw/mesc_Btbd11_scenario_', x, '_coop_mut_', y, '_atac_combined_normalized.bw')
    
    deseq.df<-deseq_comparisons.df %>% dplyr::filter(scenario == x, state ==y) %>%
      data.frame(label = paste0('p=', .$padj, ', ', .$signif))
    
    plot<-ggplot(data = crispr_coop_cov_w_coord_combined.df, aes(x = .data[[wt_sample]], y = .data[[mut_sample]]))+
      geom_abline(slope = 1, intercept = 0)+
      ggrastr::geom_point_rast(aes(color = across_enhancer), size = .1)+
      # geom_point(data = crispr_coop_cov_w_coord.df %>% dplyr::filter(across_Btbd11_enhancer), aes(x = log(repa), y = log(repb)), size = .2, color = 'red')+
      ggpubr::stat_cor(method = 'spearman', label.x.npc = 'left', label.y.npc = 'top')+
      geom_text(data = deseq.df, aes(x = 8, y = 4, label = label))+
      ggpubr::stat_cor(method = 'spearman', label.x.npc = 'left', label.y.npc = 'top')+
      scale_color_manual(values = c( 'red', 'navy', 'gray'))+
      scale_x_continuous(name = paste0(' log-reads across peaks'))+
      scale_y_continuous(name = paste0(' log-reads across peaks'))+
      ggtitle(paste0('Scenario: ', x, ', WT vs ', y, ' comparisons'))+
      theme_classic() + theme(legend.position = 'none')
      return(plot)
  })
  
  compare.plot<-wrap_plots(compare.plot.list) + plot_layout(nrow = 1, ncol = 3)
  ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_diff_comparison_scenario_', x, '.png'), compare.plot, height = 3, width = 9)
  ggsave(paste0(figure_filepath, '/Btbd11_crispr_coop_diff_comparison_scenario_', x, '.pdf'), compare.plot, height = 3, width = 9)
})

Visualize Akr1cl CRISPR

Read in perturbation data

perturbs.df<-readr::read_tsv(context_perturbs_w_bias.path) %>% dplyr::mutate(state = factor(state, c('AB','A','B'))) %>% dplyr::mutate(pred_stranded = ifelse(channel==0, pred, pred*-1))
scenarios.df<-readr::read_tsv(context_scenarios.path)

perturbs.df %>% 
  dplyr::group_by(state, model_name, task) %>%
  dplyr::summarize(sum_pred = sum(pred)) %>%
  tidyr::pivot_wider(names_from = state, values_from = sum_pred)
## # A tibble: 24 × 5
## # Groups:   model_name [12]
##    model_name        task      AB      A      B
##    <chr>             <chr>  <dbl>  <dbl>  <dbl>
##  1 atac_0h_fold1     atac  10932. 10209. 10060.
##  2 atac_12h_fold1    atac   9436.  9158.  9335.
##  3 atac_15h_fold1    atac   8429.  8041.  8033.
##  4 atac_3h_fold1     atac  15885. 14440. 15494.
##  5 atac_6h_fold1     atac  16919. 16014. 15792.
##  6 atac_9h_fold1     atac   9779.  9550.  9241.
##  7 atac_wt_fold1     atac   1064.   613.  1313.
##  8 atac_wt_fold2     atac   1681.  1331.  1331.
##  9 atac_wt_fold3     atac    299.   242.   278.
## 10 bpnet_osknz_fold1 klf4    271.   238.   239.
## # ℹ 14 more rows

Report relative differences

results.df<-perturbs.df %>%
    dplyr::group_by(state, model_name, task) %>%
    dplyr::summarize(preds = sum(pred)) %>%
    tidyr::pivot_wider(names_from = state, values_from = preds) %>%
    dplyr::mutate(AB_vs_B = AB/B,
                  log_AB_vs_B = log(AB_vs_B),
                  AB_vs_A = AB/A,
                  log_AB_vs_A = log(AB_vs_A),)
print(results.df)
## # A tibble: 24 × 9
## # Groups:   model_name [12]
##    model_name task      AB      A      B AB_vs_B log_AB_vs_B AB_vs_A log_AB_vs_A
##    <chr>      <chr>  <dbl>  <dbl>  <dbl>   <dbl>       <dbl>   <dbl>       <dbl>
##  1 atac_0h_f… atac  10932. 10209. 10060.   1.09       0.0832    1.07      0.0685
##  2 atac_12h_… atac   9436.  9158.  9335.   1.01       0.0108    1.03      0.0299
##  3 atac_15h_… atac   8429.  8041.  8033.   1.05       0.0482    1.05      0.0472
##  4 atac_3h_f… atac  15885. 14440. 15494.   1.03       0.0249    1.10      0.0954
##  5 atac_6h_f… atac  16919. 16014. 15792.   1.07       0.0689    1.06      0.0549
##  6 atac_9h_f… atac   9779.  9550.  9241.   1.06       0.0566    1.02      0.0236
##  7 atac_wt_f… atac   1064.   613.  1313.   0.810     -0.211     1.74      0.551 
##  8 atac_wt_f… atac   1681.  1331.  1331.   1.26       0.234     1.26      0.234 
##  9 atac_wt_f… atac    299.   242.   278.   1.07       0.0707    1.24      0.211 
## 10 bpnet_osk… klf4    271.   238.   239.   1.14       0.127     1.14      0.128 
## # ℹ 14 more rows
results.df %>% dplyr::select(model_name,task, AB_vs_A, log_AB_vs_A, AB_vs_B, log_AB_vs_B)
## # A tibble: 24 × 6
## # Groups:   model_name [12]
##    model_name        task  AB_vs_A log_AB_vs_A AB_vs_B log_AB_vs_B
##    <chr>             <chr>   <dbl>       <dbl>   <dbl>       <dbl>
##  1 atac_0h_fold1     atac     1.07      0.0685   1.09       0.0832
##  2 atac_12h_fold1    atac     1.03      0.0299   1.01       0.0108
##  3 atac_15h_fold1    atac     1.05      0.0472   1.05       0.0482
##  4 atac_3h_fold1     atac     1.10      0.0954   1.03       0.0249
##  5 atac_6h_fold1     atac     1.06      0.0549   1.07       0.0689
##  6 atac_9h_fold1     atac     1.02      0.0236   1.06       0.0566
##  7 atac_wt_fold1     atac     1.74      0.551    0.810     -0.211 
##  8 atac_wt_fold2     atac     1.26      0.234    1.26       0.234 
##  9 atac_wt_fold3     atac     1.24      0.211    1.07       0.0707
## 10 bpnet_osknz_fold1 klf4     1.14      0.128    1.14       0.127 
## # ℹ 14 more rows

Report summary differences

For perturbations and experiments, what are the differences between the different scenarios and states?

Perturbations

#Plot perturbation summaries
perturbs_reads.plot<-perturbs.df %>% 
  dplyr::filter(model_name=='atac_wt_fold1', genomic_position %in% c(context_genomic_window[1]:context_genomic_window[2])) %>%
  dplyr::group_by(state) %>%
  dplyr::summarize(sum_pred = sum(pred)) %>%
  ggplot(., aes(x = state, y = sum_pred))+
  geom_bar(stat = 'identity')+
  coord_flip()+
  theme_classic()
perturbs_reads.plot

ggsave(paste0(figure_filepath, '/Ark1cl_crispr_context_perturbations_summary.png'), perturbs_reads.plot, width = 7, height = 3)
ggsave(paste0(figure_filepath, '/Ark1cl_crispr_context_perturbations_summary.pdf'), perturbs_reads.plot, width = 7, height = 3)

Experiments

reads.df<-lapply(1:2, function(z){
  #Define files
  atac_exp.bw.list<-list(
    'AB' = paste0('../1_processing/bw/mesc_Btbd11_scenario_WT_coop_WT_atac_', z, '_cutsites_normalized.bw'),
    'A' = paste0('../1_processing/bw/mesc_Akr1cl_scenario_context_mut_A_atac_', z, '_cutsites_normalized.bw'),
    'B' = paste0('../1_processing/bw/mesc_Akr1cl_scenario_context_mut_B_atac_', z, '_cutsites_normalized.bw')
  )
  
  #Collect reads
  atac_reads.df<-lapply(names(atac_exp.bw.list), function(x){
    gr<-GRanges(context_chrom, IRanges(start =context_genomic_window[1], end = context_genomic_window[2]))
    df<-data.frame(reads = regionSums(gr, atac_exp.bw.list[[x]]),
                   state = x,
                   rep = z)
  }) %>% rbindlist() 
}) %>% rbindlist()

#Collect summary values
reads_summary.df<-reads.df %>% 
  dplyr::group_by(state) %>%
  dplyr::summarize(median_reads = median(reads))
exp_reads.plot<-ggplot()+
  geom_bar(data = reads_summary.df, aes(x = state, y = median_reads), stat = 'identity')+
  geom_point(data = reads.df, aes(x = state, y = reads))+
  coord_flip()+
  theme_classic()
exp_reads.plot

ggsave(paste0(figure_filepath, '/Ark1cl_crispr_context_experimental_summary.png'), exp_reads.plot, width = 7, height = 3)
ggsave(paste0(figure_filepath, '/Ark1cl_crispr_context_experimental_summary.pdf'), exp_reads.plot, width = 7, height = 3)

Process relevant ChIP-nexus data, normalize it, and summarize reads:

nexus_rep.rds.list<-list(
    'AB' = c('../1_processing/rdata/mesc_R1_sox2_nexus_15.granges.rds', 
             '../1_processing/rdata/mesc_R1_sox2_nexus_16.granges.rds'),
    'A' = c('../1_processing/rdata/mesc_Akr1cl_scenario_context_mut_A_sox2_nexus_1.granges.rds',
            '../1_processing/rdata/mesc_Akr1cl_scenario_context_mut_A_sox2_nexus_2.granges.rds',
            '../1_processing/rdata/mesc_Akr1cl_scenario_context_mut_A_sox2_nexus_3.granges.rds'),
    'B' = c('../1_processing/rdata/mesc_Akr1cl_scenario_context_mut_B_sox2_nexus_1.granges.rds',
            '../1_processing/rdata/mesc_Akr1cl_scenario_context_mut_B_sox2_nexus_2.granges.rds',
            '../1_processing/rdata/mesc_Akr1cl_scenario_context_mut_B_sox2_nexus_3.granges.rds')
    )

reads.df<-mclapply(names(nexus_rep.rds.list), function(z){
  
  #Collect reads
  nexus_reads.df<-lapply(nexus_rep.rds.list[[z]], function(x){
    rds<-readRDS(x) %>% resize(., 1, 'start')
    total_reads<-length(rds)
    gr<-GRanges(context_chrom, IRanges(start =context_genomic_window[1], end = context_genomic_window[2]))
    
    reads_in_region<-overlapsAny(rds, gr, ignore.strand = T) %>% sum()
    reads_in_region_rpm<-reads_in_region/total_reads * 1000000
    
    df<-data.frame(reads = reads_in_region_rpm,
                   rep = x,
                   state = z)
  }) %>% rbindlist() 
}, mc.cores = 3) %>% rbindlist()

#Collect summary values
reads_summary.df<-reads.df %>% 
  dplyr::group_by(state) %>%
  dplyr::summarize(median_reads = median(reads))
exp_reads.plot<-ggplot()+
  geom_bar(data = reads_summary.df, aes(x = state, y = median_reads), stat = 'identity')+
  geom_point(data = reads.df, aes(x = state, y = reads))+
  coord_flip()+
  theme_classic()
exp_reads.plot

ggsave(paste0(figure_filepath, '/Ark1cl_crispr_context_experimental_summary_nexus.png'), exp_reads.plot, width = 7, height = 3)
ggsave(paste0(figure_filepath, '/Ark1cl_crispr_context_experimental_summary_nexus.pdf'), exp_reads.plot, width = 7, height = 3)

t.test(reads.df %>% dplyr::filter(state == 'AB') %>% .$reads, reads.df %>% dplyr::filter(state == 'B') %>% .$reads)
## 
##  Welch Two Sample t-test
## 
## data:  reads.df %>% dplyr::filter(state == "AB") %>% .$reads and reads.df %>% dplyr::filter(state == "B") %>% .$reads
## t = 6.2929, df = 2.9924, p-value = 0.008167
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   5.650742 17.247514
## sample estimates:
## mean of x mean of y 
## 18.184660  6.735532
t.test(reads.df %>% dplyr::filter(state == 'AB') %>% .$reads, reads.df %>% dplyr::filter(state == 'A') %>% .$reads)
## 
##  Welch Two Sample t-test
## 
## data:  reads.df %>% dplyr::filter(state == "AB") %>% .$reads and reads.df %>% dplyr::filter(state == "A") %>% .$reads
## t = 14.015, df = 1.677, p-value = 0.009796
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  10.28456 22.43036
## sample estimates:
## mean of x mean of y 
##  18.18466   1.82720

Visualize predictions

Plot raw data.

acc.plot<-ggplot(perturbs.df %>% dplyr::filter(task %in% c('atac', 'sox2'), grepl('atac_wt|bpnet', model_name))) + 
  geom_vline(xintercept = scenarios.df$genomic_center_distal, linetype = 'dashed')+
  geom_vline(xintercept = scenarios.df$genomic_center_proximal, linetype = 'dashed')+
  geom_line(aes(x = genomic_position, y = pred_stranded, color = state, group = interaction(channel, state)))+
  scale_x_continuous(name = paste0('Genomic position (bp)'), limits = context_genomic_window)+
  scale_y_continuous(name = 'Predicted ATAC-seq')+
  scale_color_manual(name = 'State',values = c('black', '#fec44f', '#b2182b'))+
  facet_grid(model_name ~ task, scales = 'free') + 
  theme_classic()
acc.plot

ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_perturbations_raw.png'), acc.plot, width = 10, height = 20)
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_perturbations_raw.pdf'), acc.plot, width = 10, height = 20)

Plot smoothed data

acc.plot<-ggplot(perturbs.df %>% dplyr::filter(model_name %in% c('atac_wt_fold1'))) + 
  geom_vline(xintercept = scenarios.df$genomic_center_distal, linetype = 'dashed')+
  geom_vline(xintercept = scenarios.df$genomic_center_proximal, linetype = 'dashed')+
      stat_smooth(aes(x = genomic_position, y = pred_stranded, color = state, group = interaction(channel, state)), method = 'loess', span = .2)+
  scale_x_continuous(name = paste0('Genomic position (bp)'), limits = context_genomic_window)+
  scale_y_continuous(name = 'Predicted ATAC-seq')+
  scale_color_manual(name = 'State',values = c('black', '#fec44f', '#b2182b'))+
  facet_grid(model_name ~ task, scales = 'free') + 
  theme_classic()
acc.plot

ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_perturbations_smoothed.png'), acc.plot, width = 10, height = 5)
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_perturbations_smoothed.pdf'), acc.plot, width = 10, height = 5)

Visualize experimental data

Plot accessibility fragments

atac_exp.bw.list<-list(
  'AB' = 'bw/mesc_Btbd11_scenario_WT_coop_WT_atac_combined_normalized.bw',
  'A' = 'bw/mesc_Akr1cl_scenario_context_mut_A_atac_combined_normalized.bw',
  'B' = 'bw/mesc_Akr1cl_scenario_context_mut_B_atac_combined_normalized.bw'
)

atac_exp.df<-lapply(names(atac_exp.bw.list), function(x){
  standard_metapeak_matrix(GRanges(context_chrom, IRanges(start =context_genomic_window[1], end = context_genomic_window[2])),
                           atac_exp.bw.list[[x]], keep_region_coordinates = T) %>%
    as.data.frame() %>% transpose() %>%
    dplyr::rename(pred = V1) %>%
    dplyr::mutate(genomic_position = context_genomic_window[1]:context_genomic_window[2],
                  sample = x)
}) %>% rbindlist() %>%
  dplyr::mutate(sample = factor(sample, names(atac_exp.bw.list)))

atac_exp.plot<-ggplot(atac_exp.df) +
  geom_vline(xintercept = scenarios.df %>% .$genomic_center_distal, linetype = 'dashed')+
  geom_vline(xintercept = scenarios.df %>% .$genomic_center_proximal, linetype = 'dashed')+
  geom_line(aes(x = genomic_position, y = pred, color = sample))+
  scale_x_continuous(name = paste0('Genomic position (bp)'), limits = context_genomic_window)+
  # scale_x_continuous(name = paste0('Genomic position (bp)'))+
  scale_y_continuous(name = 'RPM-normalized ATAC-seq fragments')+
  scale_color_manual(name = 'State',values = c('black', '#fec44f', '#b2182b'))+
  theme_classic()

atac_exp.plot

ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_experiments_RPM_fragments.png'), atac_exp.plot, width = 7, height = 3)
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_experiments_RPM_fragments.pdf'), atac_exp.plot, width = 7, height = 3)

#Report relative fold change impacts
atac_exp.df %>% dplyr::group_by(sample) %>%
  dplyr::summarize(sum_reads = sum(pred))
## # A tibble: 3 × 2
##   sample sum_reads
##   <fct>      <dbl>
## 1 AB          434.
## 2 A           124.
## 3 B           376.

Plot accessibility cutsites

atac_exp.bw.list<-list(
  'AB' = 'bw/mesc_Btbd11_scenario_WT_coop_WT_atac_cutsites_combined_normalized.bw',
  'A' = 'bw/mesc_Akr1cl_scenario_context_mut_A_atac_cutsites_combined_normalized.bw',
  'B' = 'bw/mesc_Akr1cl_scenario_context_mut_B_atac_cutsites_combined_normalized.bw'
)

atac_exp.df<-lapply(names(atac_exp.bw.list), function(x){
  standard_metapeak_matrix(GRanges(context_chrom, IRanges(start =context_genomic_window[1], end = context_genomic_window[2])),
                           atac_exp.bw.list[[x]], keep_region_coordinates = T) %>%
    as.data.frame() %>% transpose() %>%
    dplyr::rename(pred = V1) %>%
    dplyr::mutate(genomic_position = context_genomic_window[1]:context_genomic_window[2],
                  sample = x)
}) %>% rbindlist() %>%
  dplyr::mutate(sample = factor(sample, names(atac_exp.bw.list)))

atac_exp.plot<-ggplot(atac_exp.df) +
  geom_vline(xintercept = scenarios.df %>% .$genomic_center_proximal, linetype = 'dashed')+
  geom_vline(xintercept = scenarios.df %>% .$genomic_center_distal, linetype = 'dashed')+
  geom_line(aes(x = genomic_position, y = pred, color = sample), alpha = .5)+
  scale_x_continuous(name = paste0('Genomic position (bp)'), limits = context_genomic_window)+
  # scale_x_continuous(name = paste0('Genomic position (bp)'))+
  scale_y_continuous(name = 'RPM-normalized ATAC-seq cuts')+
  scale_color_manual(name = 'State',values = c('black', '#fec44f', '#b2182b'))+
  theme_classic()
atac_exp.plot

ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_experiments_RPM_cutsites_raw.png'), atac_exp.plot, width = 7, height = 3)
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_experiments_RPM_cutsites_raw.pdf'), atac_exp.plot, width = 7, height = 3)

atac_exp.plot<-ggplot(atac_exp.df) +
  geom_vline(xintercept = scenarios.df %>% .$genomic_center_proximal, linetype = 'dashed')+
  geom_vline(xintercept = scenarios.df %>% .$genomic_center_distal, linetype = 'dashed')+
  stat_smooth(aes(x = genomic_position, y = pred, color = sample), method = 'loess', span = .2)+
  scale_x_continuous(name = paste0('Genomic position (bp)'), limits = context_genomic_window)+
  # scale_x_continuous(name = paste0('Genomic position (bp)'))+
  scale_y_continuous(name = 'RPM-normalized ATAC-seq cuts')+
  scale_color_manual(name = 'State',values = c('black', '#fec44f', '#b2182b'))+
  theme_classic()
atac_exp.plot

ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_experiments_RPM_cutsites_smoothed.png'), atac_exp.plot, width = 7, height = 3)
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_experiments_RPM_cutsites_smoothed.pdf'), atac_exp.plot, width = 7, height = 3)

Plot ChIP-nexus data

nexus_exp.bw.list<-list(
  'AB_sox2' = list(pos = 'bw/mesc_R1_sox2_nexus_combined_normalized_positive.bw',
                   neg = 'bw/mesc_R1_sox2_nexus_combined_normalized_negative.bw'),
  'A_sox2' = list(pos = 'bw/mesc_Akr1cl_scenario_context_mut_A_sox2_nexus_combined_normalized_positive.bw',
                  neg = 'bw/mesc_Akr1cl_scenario_context_mut_A_sox2_nexus_combined_normalized_negative.bw'),
  'B_sox2' = list(pos = 'bw/mesc_Akr1cl_scenario_context_mut_B_sox2_nexus_combined_normalized_positive.bw',
             neg = 'bw/mesc_Akr1cl_scenario_context_mut_B_sox2_nexus_combined_normalized_negative.bw'),
  
  'AB_klf4' = list(pos = 'bw/mesc_R1_klf4_nexus_combined_normalized_positive.bw',
                   neg = 'bw/mesc_R1_klf4_nexus_combined_normalized_negative.bw'),
  'A_klf4' = list(pos = 'bw/mesc_Akr1cl_scenario_context_mut_A_klf4_nexus_combined_normalized_positive.bw',
                  neg = 'bw/mesc_Akr1cl_scenario_context_mut_A_klf4_nexus_combined_normalized_negative.bw'),
  'B_klf4' = list(pos = 'bw/mesc_Akr1cl_scenario_context_mut_B_klf4_nexus_combined_normalized_positive.bw',
             neg = 'bw/mesc_Akr1cl_scenario_context_mut_B_klf4_nexus_combined_normalized_negative.bw')
)

nexus_exp.df<-lapply(names(nexus_exp.bw.list), function(x){
  mat<-exo_metapeak_matrix(GRanges(context_chrom, IRanges(start =context_genomic_window[1], end = context_genomic_window[2])),
                           nexus_exp.bw.list[[x]], keep_region_coordinates = T) 
  
  df<-rbind(
    mat$pos %>%
    as.data.frame() %>% transpose() %>%
    dplyr::rename(pred = V1) %>%
    dplyr::mutate(genomic_position = context_genomic_window[1]:context_genomic_window[2],
                  sample = x,
                  channel = 'pos'),
    mat$neg %>%
    as.data.frame() %>% transpose() %>%
    dplyr::rename(pred = V1) %>%
    dplyr::mutate(genomic_position = context_genomic_window[1]:context_genomic_window[2],
                  sample = x,
                  pred = pred*-1,
                  channel = 'neg')
  )
  return(df)
}) %>% rbindlist() %>%
  dplyr::mutate(sample = factor(sample, names(nexus_exp.bw.list))) %>%
  tidyr::separate(sample, c('state', 'TF'), sep = '_') %>%
  dplyr::mutate(state = factor(state, levels = c('AB','A','B')))

nexus_exp.plot<-ggplot(nexus_exp.df) +
  geom_vline(xintercept = scenarios.df %>% .$genomic_center_proximal, linetype = 'dashed')+
  geom_vline(xintercept = scenarios.df %>% .$genomic_center_distal, linetype = 'dashed')+
  geom_area(aes(x = genomic_position, y = pred, fill = state, group = interaction(state, channel)))+
  scale_x_continuous(name = paste0('Genomic position (bp)'), limits = context_genomic_window)+
  # scale_x_continuous(name = paste0('Genomic position (bp)'))+
  scale_y_continuous(name = 'RPM-normalized ChIP-nexus reads')+
  scale_fill_manual(name = 'State',values = c('black', '#fec44f', '#b2182b'))+
  facet_grid(state ~ TF)+
  theme_classic()
nexus_exp.plot

ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_experiments_RPM_nexus.png'), nexus_exp.plot, width = 7, height = 7)
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_experiments_RPM_nexus.pdf'), nexus_exp.plot, width = 7, height = 7)

Visualize contribution

curated_enhancer_boundaries.gr<-GRanges(context_chrom, IRanges(context_genomic_window[1], context_genomic_window[2]))

Generate contribution scores of mapped motifs.

curated_enhancer.df<-data.frame(
  acc_contrib = standard_metapeak_matrix(regions.gr = curated_enhancer_boundaries.gr, sample.cov = 'shap/atac_wt_fold1_atac_counts.bw', 
                                         keep_region_coordinates = T) %>% t(),
  seq = getSeq(bsgenome, curated_enhancer_boundaries.gr, as.character = T) %>% stringr::str_split_1(pattern = ''),
  position = start(curated_enhancer_boundaries.gr):end(curated_enhancer_boundaries.gr)
)

# Generate sequence logo
acc_contrib.plot<-curated_enhancer.df %>%
  tidyr::pivot_wider(names_from = seq, values_from = acc_contrib) %>% 
  dplyr::select(A, C, G, T) %>% as.matrix() %>% t() %>%
ggseqlogo(., method='custom', seq_type='dna') + 
  scale_y_continuous(name = 'Acc. contribution.')+
  scale_x_discrete(name = 'Genomic position (chr10, bp)')+
  ggtitle('CRISPR enhancer')
acc_contrib.plot

ggsave(paste0(figure_filepath, '/crispr_context_candidate_contrib.png'), acc_contrib.plot, height = 2, width = 20)
ggsave(paste0(figure_filepath, '/crispr_context_candidate_contrib.pdf'), acc_contrib.plot, height = 2, width = 20)

Read in contribution scores and plot

seqs.fa<-seqinr::read.fasta('fasta/crispr_context_seqs.fa')

filler<-mclapply(Sys.glob('shap/crispr/crispr_context_*_counts.h5'), function(y){
  contrib.h5<-rhdf5::h5read(y, '/')
  
  hyp_scores.list<-apply(contrib.h5$hyp_scores, 3, function(x){
    as.data.frame(x) %>% transpose()
  })
  input_seqs.list<-apply(contrib.h5$input_seqs, 3, function(x){
    df<-as.data.frame(x)
    df<-dplyr::mutate_all(df, function(x) as.numeric(as.character(x)))
    return(df %>% transpose)
  })
  
  contrib.list<-lapply(1:length(hyp_scores.list), function(x){
    mat<-input_seqs.list[[x]] * hyp_scores.list[[x]]
    colnames(mat)<-c('A','C','G','T')
    return(mat %>% t())
  })
  names(contrib.list)<-names(seqs.fa)
  
  #Visualize contribution scores
  contrib.plot<- ggseqlogo(contrib.list, method='custom', seq_type='dna') + 
    scale_y_continuous(name = 'Contribution') + 
    scale_x_continuous(limits = c(800, 1100)) + 
    facet_wrap(~seq_group, ncol=1, scales='free_x') +
    theme(axis.line = element_line(), axis.ticks = element_line())+
    ggtitle(y)
  
  ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_contribution_', basename(y), '.png'), contrib.plot, width = 16, height = 10)
  ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_contribution_', basename(y), '.pdf'), contrib.plot, width = 16, height = 10)
}, mc.cores = 6)

Supplemental validations

Replicate validation

Compare CRISPR ATAC-seq samples to make sure they’re replicable. So only compare replicates here.

atac_regions.gr<-rtracklayer::import(atac_regions.path) %>% 
  plyranges::mutate(across_Akr1cl = ifelse(overlapsAny(., GRanges(context_chrom, IRanges(start = context_genomic_window[1], end = context_genomic_window[2]))), 'Akr1cl','none'),
                    across_enhancer = ifelse(overlapsAny(., dev_enhancers.gr), 'dev_enh', across_Akr1cl))

#Collect coverage
crispr_context_cov.df<-mclapply(crispr_context_atac_reps.path, function(x){
  regionSums(atac_regions.gr, x)
}, mc.cores = 4) %>% as.data.frame
colnames(crispr_context_cov.df)<-crispr_context_atac_reps.path

#Consolidate values
crispr_context_cov_w_coord.df<-cbind(atac_regions.gr %>% as.data.frame, crispr_context_cov.df) %>% arrange(desc(across_enhancer))

#Plot the sequencing depth imbalance.
plot.list<-mclapply(crispr_context_atac_reps.path %>% grep('_atac_1_cutsites.bw', ., value = T), function(x){
  crispr_context_cov_w_coord.df$repa<-crispr_context_cov_w_coord.df[[x]]
  crispr_context_cov_w_coord.df$repb<-crispr_context_cov_w_coord.df[[gsub('_1_', '_2_', x)]]

  plot<-ggplot(data = crispr_context_cov_w_coord.df, aes(x = log(repa), y = log(repb)))+
    geom_abline(slope = 1, intercept = 0)+
    ggrastr::geom_point_rast(aes(color = across_enhancer), size = .1)+
    # geom_point(data = crispr_context_cov_w_coord.df %>% dplyr::filter(across_Akr1cl_enhancer), aes(x = log(repa), y = log(repb)), size = .2, color = 'red')+
    ggpubr::stat_cor(method = 'spearman', label.x.npc = 'left', label.y.npc = 'top')+
    scale_color_manual(values = c( 'red', 'navy', 'gray'))+
    scale_x_continuous(name = paste0('rep 1 log-reads across peaks'))+
    scale_y_continuous(name = paste0('rep 2 log-reads across peaks'))+
    ggtitle(basename(x) %>% gsub('_atac_1_cutsites.bw', '', .))+
    theme_classic() + theme(legend.position = 'none')
  
  return(plot)
}, mc.cores = 3)

rep.plot<-wrap_plots(plot.list) + plot_layout(nrow = 1, ncol = 3)
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_rep_comparison.png'), rep.plot, height = 3, width = 9)
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_rep_comparison.pdf'), rep.plot, height = 3, width = 9)

Compare CRISPR ChIP-nexus replicates.

sox2_regions.gr<-rtracklayer::import(sox2_regions.path) %>% 
  plyranges::mutate(across_Akr1cl = ifelse(overlapsAny(., GRanges(context_chrom, IRanges(start = context_genomic_window[1], end = context_genomic_window[2]))), 'Akr1cl','none'),
                    across_enhancer = ifelse(overlapsAny(., dev_enhancers.gr), 'dev_enh', across_Akr1cl))

#Collect coverage
crispr_context_cov.df<-mclapply(crispr_context_nexus_reps.path, function(x){
  regionSums(sox2_regions.gr, x) + abs(regionSums(sox2_regions.gr, gsub('positive','negative',x)))
}, mc.cores = 4) %>% as.data.frame
colnames(crispr_context_cov.df)<-crispr_context_nexus_reps.path

#Consolidate values
crispr_context_cov_w_coord.df<-cbind(sox2_regions.gr %>% as.data.frame, crispr_context_cov.df) %>% arrange(desc(across_enhancer))

#Plot the sequencing depth imbalance.
plot.list<-mclapply(crispr_context_nexus_reps.path %>% grep('_sox2_nexus_1_positive.bw', ., value = T), function(x){
  crispr_context_cov_w_coord.df$repa<-crispr_context_cov_w_coord.df[[x]]
  crispr_context_cov_w_coord.df$repb<-crispr_context_cov_w_coord.df[[gsub('_1_', '_2_', x)]]
  crispr_context_cov_w_coord.df$repc<-crispr_context_cov_w_coord.df[[gsub('_1_', '_3_', x)]]

  plotab<-ggplot(data = crispr_context_cov_w_coord.df, aes(x = log(repa), y = log(repb)))+
    geom_abline(slope = 1, intercept = 0)+
    ggrastr::geom_point_rast(aes(color = across_enhancer), size = .1)+
    # geom_point(data = crispr_context_cov_w_coord.df %>% dplyr::filter(across_Akr1cl_enhancer), aes(x = log(repa), y = log(repb)), size = .2, color = 'red')+
    ggpubr::stat_cor(method = 'spearman', label.x.npc = 'left', label.y.npc = 'top')+
    scale_color_manual(values = c( 'red', 'navy', 'gray'))+
    scale_x_continuous(name = paste0('rep 1 log-reads across peaks'))+
    scale_y_continuous(name = paste0('rep 2 log-reads across peaks'))+
    ggtitle(basename(x) %>% gsub('_sox2_1_cutsites.bw', '', .))+
    theme_classic() + theme(legend.position = 'none')
  plotac<-ggplot(data = crispr_context_cov_w_coord.df, aes(x = log(repa), y = log(repc)))+
    geom_abline(slope = 1, intercept = 0)+
    ggrastr::geom_point_rast(aes(color = across_enhancer), size = .1)+
    # geom_point(data = crispr_context_cov_w_coord.df %>% dplyr::filter(across_Akr1cl_enhancer), aes(x = log(repa), y = log(repb)), size = .2, color = 'red')+
    ggpubr::stat_cor(method = 'spearman', label.x.npc = 'left', label.y.npc = 'top')+
    scale_color_manual(values = c( 'red', 'navy', 'gray'))+
    scale_x_continuous(name = paste0('rep 1 log-reads across peaks'))+
    scale_y_continuous(name = paste0('rep 3 log-reads across peaks'))+
    ggtitle(basename(x) %>% gsub('_sox2_1_cutsites.bw', '', .))+
    theme_classic() + theme(legend.position = 'none')
  plotbc<-ggplot(data = crispr_context_cov_w_coord.df, aes(x = log(repb), y = log(repc)))+
    geom_abline(slope = 1, intercept = 0)+
    ggrastr::geom_point_rast(aes(color = across_enhancer), size = .1)+
    # geom_point(data = crispr_context_cov_w_coord.df %>% dplyr::filter(across_Akr1cl_enhancer), aes(x = log(repa), y = log(repb)), size = .2, color = 'red')+
    ggpubr::stat_cor(method = 'spearman', label.x.npc = 'left', label.y.npc = 'top')+
    scale_color_manual(values = c( 'red', 'navy', 'gray'))+
    scale_x_continuous(name = paste0('rep 2 log-reads across peaks'))+
    scale_y_continuous(name = paste0('rep 3 log-reads across peaks'))+
    ggtitle(basename(x) %>% gsub('_sox2_1_cutsites.bw', '', .))+
    theme_classic() + theme(legend.position = 'none')
  
  return(plotab + plotac + plotbc)
}, mc.cores = 3)

rep.plot<-wrap_plots(plot.list) + plot_layout(nrow = 2, ncol = 1)
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_nexus_rep_comparison.png'), rep.plot, height = 6, width = 9)
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_nexus_rep_comparison.pdf'), rep.plot, height = 6, width = 9)


plotwt<-ggplot(data = crispr_context_cov_w_coord.df, aes(x = log(`bw/mesc_R1_sox2_nexus_15_positive.bw`), y = log(`bw/mesc_R1_sox2_nexus_16_positive.bw`)))+
  geom_abline(slope = 1, intercept = 0)+
  ggrastr::geom_point_rast(aes(color = across_enhancer), size = .1)+
  # geom_point(data = crispr_context_cov_w_coord.df %>% dplyr::filter(across_Akr1cl_enhancer), aes(x = log(repa), y = log(repb)), size = .2, color = 'red')+
  ggpubr::stat_cor(method = 'spearman', label.x.npc = 'left', label.y.npc = 'top')+
  scale_color_manual(values = c( 'red', 'navy', 'gray'))+
  scale_x_continuous(name = paste0('rep 1 log-reads across peaks'))+
  scale_y_continuous(name = paste0('rep 2 log-reads across peaks'))+
  ggtitle('WT')+
  theme_classic() + theme(legend.position = 'none')
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_nexus_wt_rep_comparison.png'), plotwt, height = 3, width = 3)
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_nexus_wt_rep_comparison.pdf'), plotwt, height = 3, width = 3)

Developmental enhancer consistency

Do our conditions and CRISPR mutants look consistent across regions of interest that are not the CRISPR locus?

filler<-lapply(1:length(dev_enhancers.gr), function(x){
  
  metapeak.df<-mclapply(crispr_context_atac_combined.path, function(y){
    standard_metapeak(resize(dev_enhancers.gr[x], 1, 'center'), y, downstream = 501, upstream = 500) %>%
      dplyr::mutate(path = y, enhancer = dev_enhancers.gr[x]$name)
  }, mc.cores = 8) %>% rbindlist() %>%
    dplyr::mutate(scenario_state = basename(path) %>% gsub('mesc_Akr1cl_scenario_', '', .) %>% gsub('mesc_Btbd11_scenario_', '', .) %>% gsub('_atac_combined_normalized.bw', '', .),
                  genomic_position = tss_distance + start(resize(dev_enhancers.gr[x], 1, 'center'))) %>%
    tidyr::separate(col = 'scenario_state', into = c('scenario','state'), sep = 'context_|coop_', remove = F) %>%
    dplyr::mutate(state = factor(gsub('mut_', '', state), c('WT','A','B')))
    
  
  enhancer_name<-dev_enhancers.gr[x]$name
  plot<-ggplot(metapeak.df, aes(x = genomic_position, y = reads, color = state))+
    geom_line()+
    scale_x_continuous(name = seqnames(dev_enhancers.gr[x])) + 
    scale_y_continuous(name = 'RPM-normalized ATAC-seq fragments', limits = c(0, 5))+
    scale_colour_manual(values = turbo(4, begin = .2, end = .8))+
    ggtitle(enhancer_name)+
    theme_classic()
  ggsave(paste0(figure_filepath, '/enhancers/', enhancer_name, '_Akr1cl_crispr_context_comparison.png'), plot, height = 4, width = 4)
  ggsave(paste0(figure_filepath, '/enhancers/', enhancer_name, '_Akr1cl_crispr_context_comparison.pdf'), plot, height = 4, width = 4)
  return(NULL)
})

Plot ChIP-nexus replicates as well

filler<-lapply(1:length(dev_enhancers.gr), function(x){
  
  metapeak.df<-mclapply(crispr_context_nexus_combined.path, function(y){
    exo_metapeak(resize(dev_enhancers.gr[x], 1, 'center'), list(pos = y, neg = gsub('positive','negative', y)), downstream = 501, upstream = 500) %>%
      dplyr::mutate(path = y, enhancer = dev_enhancers.gr[x]$name) 
  }, mc.cores = 8) %>% rbindlist() %>%
    dplyr::mutate(scenario_state = basename(path) %>% gsub('mesc_Akr1cl_scenario_', '', .) %>% gsub('mesc_R1', 'context_WT', .) %>% gsub('_sox2_nexus_combined_normalized_positive.bw', '', .),
                  genomic_position = tss_distance + start(resize(dev_enhancers.gr[x], 1, 'center'))) %>%
    tidyr::separate(col = 'scenario_state', into = c('scenario','state'), sep = 'context_|coop_', remove = F) %>%
    dplyr::mutate(state = factor(gsub('mut_', '', state), c('WT','A','B')))
  # metapeak.df$reads[metapeak.df$strand=='-']<-metapeak.df$reads[metapeak.df$strand=='-']*-1
  
  enhancer_name<-dev_enhancers.gr[x]$name
  plot<-ggplot(metapeak.df, aes(x = genomic_position, y = reads, color = state, group = interaction(strand, state)))+
    geom_line()+
    scale_x_continuous(name = seqnames(dev_enhancers.gr[x])) + 
    scale_y_continuous(name = 'RPM-normalized sox2-seq fragments')+
    scale_colour_manual(values = turbo(4, begin = .2, end = .8))+
    ggtitle(enhancer_name)+
    theme_classic()
  ggsave(paste0(figure_filepath, '/enhancers/', enhancer_name, '_Akr1cl_crispr_context_nexus_comparison.png'), plot, height = 4, width = 4)
  ggsave(paste0(figure_filepath, '/enhancers/', enhancer_name, '_Akr1cl_crispr_context_nexus_comparison.pdf'), plot, height = 4, width = 4)
  return(NULL)
})

CRISPR mutant differential enrichment

Run DESeq2 in order to measure whether CRISPR scenarios are significantly differentially enriched when we do these mutations. Since we are comparing a few features, we will contrast a couple assessments first.

#Collect coverage
crispr_context_cov.df<-mclapply(crispr_context_atac_reps.path, function(x){
  regionSums(atac_regions.gr, x)
}, mc.cores = 4) %>% as.data.frame
colnames(crispr_context_cov.df)<-crispr_context_atac_reps.path

#Format organization
atac_labels.df<-data.frame(filename = colnames(crispr_context_cov.df)) %>%
  dplyr::mutate(state = basename(crispr_context_atac_reps.path) %>% gsub('mesc_Akr1cl_scenario_context_', '', .) %>% gsub('mesc_Btbd11_scenario_WT_coop_', '', .) %>% gsub('_cutsites.bw', '', .)) %>%
  # tidyr::separate(state, into = c('timepoint','replicate'), remove = F)
    tidyr::separate(col = 'state', into = c('state','rep'), sep = '_atac_', remove = T) %>%
    dplyr::mutate(state = factor(gsub('mut_', '', state), c('WT','A','B','null')))
rownames(atac_labels.df)<-crispr_context_atac_reps.path
atac_labels.df
##                                                                                                          filename
## bw/mesc_Akr1cl_scenario_context_mut_A_atac_1_cutsites.bw bw/mesc_Akr1cl_scenario_context_mut_A_atac_1_cutsites.bw
## bw/mesc_Akr1cl_scenario_context_mut_A_atac_2_cutsites.bw bw/mesc_Akr1cl_scenario_context_mut_A_atac_2_cutsites.bw
## bw/mesc_Akr1cl_scenario_context_mut_B_atac_1_cutsites.bw bw/mesc_Akr1cl_scenario_context_mut_B_atac_1_cutsites.bw
## bw/mesc_Akr1cl_scenario_context_mut_B_atac_2_cutsites.bw bw/mesc_Akr1cl_scenario_context_mut_B_atac_2_cutsites.bw
## bw/mesc_Btbd11_scenario_WT_coop_WT_atac_1_cutsites.bw       bw/mesc_Btbd11_scenario_WT_coop_WT_atac_1_cutsites.bw
## bw/mesc_Btbd11_scenario_WT_coop_WT_atac_2_cutsites.bw       bw/mesc_Btbd11_scenario_WT_coop_WT_atac_2_cutsites.bw
##                                                          state rep
## bw/mesc_Akr1cl_scenario_context_mut_A_atac_1_cutsites.bw     A   1
## bw/mesc_Akr1cl_scenario_context_mut_A_atac_2_cutsites.bw     A   2
## bw/mesc_Akr1cl_scenario_context_mut_B_atac_1_cutsites.bw     B   1
## bw/mesc_Akr1cl_scenario_context_mut_B_atac_2_cutsites.bw     B   2
## bw/mesc_Btbd11_scenario_WT_coop_WT_atac_1_cutsites.bw       WT   1
## bw/mesc_Btbd11_scenario_WT_coop_WT_atac_2_cutsites.bw       WT   2
#Consolidate counts
atac.df<-crispr_context_cov.df
colnames(atac.df)<-crispr_context_atac_reps.path

#Run DESeq2
dds <- DESeqDataSetFromMatrix(countData = atac.df,
                              colData = atac_labels.df,
                              design = ~ state)
dds <- DESeq(dds)

Show enrichment results.

deseq_comparisons.df<-lapply(c('A','B'), function(y){
  res <- results(dds, contrast = c('state', 'WT', y)) %>%
    cbind(., atac_regions.gr %>% as.data.frame()) %>% as.data.frame %>% 
    dplyr::mutate(signif = cut(padj, breaks = c(0, 0.001, 0.01, 0.05, 1), include.lowest = T, labels = c('***','**','*',''))) %>%
    dplyr::filter(across_enhancer == 'Akr1cl') %>% 
    dplyr::mutate(state = y, comparison = paste0('Akr1cl enhancer: WT vs ', y))
}) %>% rbindlist()

deseq_comparisons.df
##    baseMean log2FoldChange      lfcSE      stat       pvalue         padj
##       <num>          <num>      <num>     <num>        <num>        <num>
## 1: 931.3056      1.6677730 0.09883465 16.874377 6.945437e-64 4.114899e-61
## 2: 931.3056      0.2282224 0.08967963  2.544863 1.093206e-02 2.882342e-02
##    seqnames    start      end width strand   name score signalValue  pValue
##      <fctr>    <int>    <int> <int> <fctr> <char> <num>       <num>   <num>
## 1:     chr1 65037228 65038227  1000      *   <NA>  1000      8.6952 385.814
## 2:     chr1 65037228 65038227  1000      *   <NA>  1000      8.6952 385.814
##     qValue  peak across_Akr1cl across_enhancer signif  state
##      <num> <int>        <char>          <char> <fctr> <char>
## 1: 382.556   500        Akr1cl          Akr1cl    ***      A
## 2: 382.556   500        Akr1cl          Akr1cl      *      B
##                  comparison
##                      <char>
## 1: Akr1cl enhancer: WT vs A
## 2: Akr1cl enhancer: WT vs B

Visualize comparison plots with statistics attached:

#Collect coverage
crispr_context_cov_combined.df<-mclapply(crispr_context_atac_combined.path, function(x){
  log(regionSums(atac_regions.gr, x))
}, mc.cores = 4) %>% as.data.frame
colnames(crispr_context_cov_combined.df)<-crispr_context_atac_combined.path

#Consolidate values
crispr_context_cov_w_coord_combined.df<-cbind(atac_regions.gr %>% as.data.frame, crispr_context_cov_combined.df) %>% arrange(desc(across_enhancer))


#Plot the sequencing depth imbalance.
compare.plot.list<-lapply(c('A','B'), function(y){
  
  wt_sample<-paste0('bw/mesc_Btbd11_scenario_WT_coop_WT_atac_combined_normalized.bw')
  mut_sample<-paste0('bw/mesc_Akr1cl_scenario_context_mut_', y, '_atac_combined_normalized.bw')
  
  deseq.df<-deseq_comparisons.df %>% dplyr::filter(state ==y) %>%
    data.frame(label = paste0('p=', .$padj, ', ', .$signif))
  
  plot<-ggplot(data = crispr_context_cov_w_coord_combined.df, aes(x = .data[[wt_sample]], y = .data[[mut_sample]]))+
    geom_abline(slope = 1, intercept = 0)+
    ggrastr::geom_point_rast(aes(color = across_enhancer), size = .1)+
    # geom_point(data = crispr_context_cov_w_coord.df %>% dplyr::filter(across_Akr1cl_enhancer), aes(x = log(repa), y = log(repb)), size = .2, color = 'red')+
    ggpubr::stat_cor(method = 'spearman', label.x.npc = 'left', label.y.npc = 'top')+
    geom_text(data = deseq.df, aes(x = 8, y = 4, label = label))+
    ggpubr::stat_cor(method = 'spearman', label.x.npc = 'left', label.y.npc = 'top')+
    scale_color_manual(values = c( 'red', 'navy', 'gray'))+
    scale_x_continuous(name = paste0(' log-reads across peaks'))+
    scale_y_continuous(name = paste0(' log-reads across peaks'))+
    ggtitle(paste0('Scenario context WT vs ', y, ' comparisons'))+
    theme_classic() + theme(legend.position = 'none')
  return(plot)
})

compare.plot<-wrap_plots(compare.plot.list) + plot_layout(nrow = 1, ncol = 2)
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_diff_comparison.png'), compare.plot, height = 3, width = 6)
ggsave(paste0(figure_filepath, '/Akr1cl_crispr_context_diff_comparison.pdf'), compare.plot, height = 3, width = 6)

Session Information

For the purposes of reproducibility, the R/Bioconductor session information is printed below:

sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Rocky Linux 8.9 (Green Obsidian)
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so;  LAPACK version 3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: America/Chicago
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] DESeq2_1.44.0                            
##  [2] SummarizedExperiment_1.34.0              
##  [3] MatrixGenerics_1.16.0                    
##  [4] matrixStats_1.5.0                        
##  [5] rhdf5_2.48.0                             
##  [6] ggseqlogo_0.2                            
##  [7] TxDb.Mmusculus.UCSC.mm10.knownGene_3.10.0
##  [8] GenomicFeatures_1.56.0                   
##  [9] AnnotationDbi_1.66.0                     
## [10] Biobase_2.64.0                           
## [11] BSgenome.Mmusculus.UCSC.mm10_1.4.3       
## [12] BSgenome_1.72.0                          
## [13] BiocIO_1.14.0                            
## [14] viridis_0.6.5                            
## [15] viridisLite_0.4.2                        
## [16] digest_0.6.37                            
## [17] pander_0.6.6                             
## [18] lattice_0.22-6                           
## [19] testit_0.13                              
## [20] readr_2.1.5                              
## [21] patchwork_1.3.0                          
## [22] data.table_1.17.0                        
## [23] dplyr_1.1.4                              
## [24] Rsamtools_2.20.0                         
## [25] plyranges_1.24.0                         
## [26] reshape2_1.4.4                           
## [27] ggplot2_3.5.2                            
## [28] Biostrings_2.72.1                        
## [29] XVector_0.44.0                           
## [30] magrittr_2.0.3                           
## [31] rtracklayer_1.64.0                       
## [32] GenomicRanges_1.56.2                     
## [33] GenomeInfoDb_1.40.1                      
## [34] IRanges_2.38.1                           
## [35] S4Vectors_0.42.1                         
## [36] BiocGenerics_0.50.0                      
## 
## loaded via a namespace (and not attached):
##  [1] rstudioapi_0.17.1        jsonlite_1.8.9           ggbeeswarm_0.7.2        
##  [4] farver_2.1.2             rmarkdown_2.29           zlibbioc_1.50.0         
##  [7] ragg_1.3.3               vctrs_0.6.5              Cairo_1.6-2             
## [10] memoise_2.0.1            RCurl_1.98-1.16          rstatix_0.7.2           
## [13] htmltools_0.5.8.1        S4Arrays_1.4.1           curl_6.2.1              
## [16] broom_1.0.7              cellranger_1.1.0         Rhdf5lib_1.26.0         
## [19] Formula_1.2-5            SparseArray_1.4.8        sass_0.4.9              
## [22] bslib_0.8.0              plyr_1.8.9               cachem_1.1.0            
## [25] GenomicAlignments_1.40.0 lifecycle_1.0.4          pkgconfig_2.0.3         
## [28] Matrix_1.7-0             R6_2.5.1                 fastmap_1.2.0           
## [31] GenomeInfoDbData_1.2.12  colorspace_2.1-1         textshaping_1.0.0       
## [34] RSQLite_2.3.9            ggpubr_0.6.0             labeling_0.4.3          
## [37] httr_1.4.7               abind_1.4-8              mgcv_1.9-1              
## [40] compiler_4.4.1           bit64_4.5.2              withr_3.0.2             
## [43] backports_1.5.0          BiocParallel_1.38.0      carData_3.0-5           
## [46] DBI_1.2.3                ggsignif_0.6.4           MASS_7.3-60.2           
## [49] DelayedArray_0.30.1      rjson_0.2.23             tools_4.4.1             
## [52] vipor_0.4.7              beeswarm_0.4.0           glue_1.8.0              
## [55] restfulr_0.0.15          nlme_3.1-164             rhdf5filters_1.16.0     
## [58] grid_4.4.1               ade4_1.7-23              generics_0.1.3          
## [61] seqinr_4.2-36            gtable_0.3.6             tzdb_0.4.0              
## [64] tidyr_1.3.1              hms_1.1.3                car_3.1-3               
## [67] utf8_1.2.4               pillar_1.10.2            stringr_1.5.1           
## [70] vroom_1.6.5              splines_4.4.1            bit_4.6.0               
## [73] tidyselect_1.2.1         locfit_1.5-9.10          knitr_1.50              
## [76] gridExtra_2.3            xfun_0.51                stringi_1.8.4           
## [79] UCSC.utils_1.0.0         yaml_2.3.10              evaluate_1.0.3          
## [82] codetools_0.2-20         tibble_3.2.1             cli_3.6.3               
## [85] systemfonts_1.2.3        munsell_0.5.1            jquerylib_0.1.4         
## [88] Rcpp_1.0.14              readxl_1.4.5             png_0.1-8               
## [91] XML_3.99-0.18            ggrastr_1.0.2            blob_1.2.4              
## [94] bitops_1.0-9             scales_1.3.0             purrr_1.0.2             
## [97] crayon_1.5.3             rlang_1.1.4              KEGGREST_1.44.1